module Example.Example where import Text.HTML.TagSoup import Text.StringLike(StringLike) import Text.HTML.Download import Control.Monad import Data.List import Data.Char import System.CPUTime import System.IO import qualified Data.ByteString.Lazy.Char8 as BS grab :: String -> IO () grab x = openItem x >>= putStr parse :: String -> IO () parse x = openItem x >>= putStr . show2 . parseTags where show2 [] = "[]" show2 xs = "[" ++ concat (intersperseNotBroken "\n," $ map show xs) ++ "\n]" -- the standard intersperse has a strictness bug which sucks! intersperseNotBroken :: a -> [a] -> [a] intersperseNotBroken _ [] = [] intersperseNotBroken sep (x:xs) = x : is xs where is [] = [] is (y:ys) = sep : y : is ys {-

Retrieved from "http://haskell.org/haskellwiki/Haskell"

This page has been accessed 507,753 times. This page was last modified 08:05, 24 January 2007. Recent content is available under a simple permissive license.

-} haskellHitCount :: IO () haskellHitCount = do tags <- liftM parseTags $ openURL "http://haskell.org/haskellwiki/Haskell" let count = fromFooter $ head $ sections (~== "
") tags putStrLn $ "haskell.org has been hit " ++ show count ++ " times" where fromFooter x = read (filter isDigit num) :: Int where num = ss !! (i - 1) Just i = findIndex (== "times.") ss ss = words s TagText s = sections (~== "

") x !! 1 !! 1 {- Blogger code of conduct proposed -} googleTechNews :: IO () googleTechNews = do tags <- liftM parseTags $ openURL "http://news.google.com/?ned=us&topic=t" let links = [ text | TagOpen "a" atts:TagOpen "b" []:TagText text:_ <- tails tags, ("id",'u':'-':_) <- atts] putStr $ unlines links spjPapers :: IO () spjPapers = do tags <- liftM parseTags $ openURL "http://research.microsoft.com/~simonpj/" let links = map f $ sections (~== "") $ takeWhile (~/= "") $ drop 5 $ dropWhile (~/= "") tags putStr $ unlines links where f :: [Tag String] -> String f = dequote . unwords . words . fromTagText . head . filter isTagText dequote ('\"':xs) | last xs == '\"' = init xs dequote x = x ndmPapers :: IO () ndmPapers = do tags <- liftM parseTags $ openURL "http://www-users.cs.york.ac.uk/~ndm/downloads/" let papers = map f $ sections (~== "

  • ") tags putStr $ unlines papers where f :: [Tag String] -> String f xs = fromTagText (xs !! 2) currentTime :: IO () currentTime = do tags <- liftM parseTags $ openURL "http://www.timeanddate.com/worldclock/city.html?n=136" let res = fromTagText (dropWhile (~/= "") tags !! 1) putStrLn res type Section = String data Package = Package {name :: String, desc :: String, href :: String} deriving Show hackage :: IO [(Section,[Package])] hackage = do tags <- liftM parseTags $ openURL "http://hackage.haskell.org/packages/archive/pkg-list.html" return $ map parseSect $ partitions (~== "

    ") tags where parseSect xs = (nam, packs) where nam = fromTagText $ xs !! 2 packs = map parsePackage $ partitions (~== "
  • ") xs parsePackage xs = Package (fromTagText $ xs !! 2) (drop 2 $ dropWhile (/= ':') $ fromTagText $ xs !! 4) (fromAttrib "href" $ xs !! 1) -- rssCreators Example: prints names of story contributors on -- sequence.complete.org. This content is RSS (not HTML), and the selected -- tag uses a different XML namespace "dc:creator". rssCreators :: IO () rssCreators = do tags <- liftM parseTags $ openURL "http://sequence.complete.org/node/feed" putStrLn $ unlines $ map names $ partitions (~== "") tags where names xs = fromTagText $ xs !! 1 validate :: String -> IO () validate x = putStr . unlines . g . f . parseTagsOptions opts =<< openItem x where opts = parseOptions{optTagPosition=True, optTagWarning=True} f :: [Tag String] -> [String] f (TagPosition row col:TagWarning warn:rest) = ("Warning (" ++ show row ++ "," ++ show col ++ "): " ++ warn) : f rest f (TagWarning warn:rest) = ("Warning (?,?): " ++ warn) : f rest f (_:rest) = f rest f [] = [] g xs = xs ++ [if n == 0 then "Success, no warnings" else "Failed, " ++ show n ++ " warning" ++ ['s'|n>1]] where n = length xs -- figure out how many characters per second it can parse, based on a small sample -- try it with a default of 100 repititions, figure out what a better length might be -- and try again -- want to measure a time of > 1 second sample :: String sample = " is " ++ " and some just random & test ><" pico :: Integer pico = 1000000000000 stringLength x = fromIntegral (BS.length x) :: Int stringReadFile = BS.readFile stringRep i s = BS.take (fromIntegral i) $ BS.concat $ repeat s stringPack = BS.pack timefile :: FilePath -> IO () timefile xs = do s <- stringReadFile xs let n = stringLength s r <- n `seq` timeN n s printCps n r time :: IO () time = do putStrLn "Timing parseTags" f 100 where str = stringPack sample f n = do let s = stringRep n str i <- stringLength s `seq` timeN n s let cps = fromIntegral n / i n2 = min (n*10) (abs $ floor $ (fromIntegral n*11) / (i*10)) if i > 1 then printCps n i else f n2 printCps :: Int -> Double -> IO () printCps n i = putStrLn $ "parseTags = " ++ showUnit (floor cps) ++ " characters/second" where cps = fromIntegral n / i -- number of repetitions, time in seconds timeN :: StringLike s => Int -> s -> IO Double timeN n str = do hSetBuffering stdout NoBuffering putStr $ show n ++ " repetitions = " start <- getCPUTime let res = parseTags str () <- length res `seq` return () end <- getCPUTime let time = fromInteger (1 + end - start) / fromInteger pico putStrLn $ show time ++ " seconds" return time showUnit :: Integer -> String showUnit x = num ++ unit where units = " KMGTPEZY" (use,skip) = splitAt 3 $ show x unit = [units !! ((length skip + 2) `div` 3)] dot = ((length skip - 1) `mod` 3) + 1 num = a ++ ['.' | b /= ""] ++ b where (a,b) = splitAt dot use