{- - = Creole syntax parser - This module provides wiki syntax parser which is **maybe** compatible with Creole 1.0.\\ - The grammer of Creole 1.0 is can be seen from following URL. - - * http://www.wikicreole.org/wiki/Creole1.0 - - == Download - - * http://return0.dyndns.org/hscreole/Creole.hs - - You might suspect why only the source code is distributed,\\ - but it is only because of the troublesome. - - == Usage - {{{ - import Creole - main = print $ parseHtml "= heading\nparagraph.\n\n* list\n* list\n\nhttp://example.org" - }}} - - and you will get following string. - - {{{ -

heading

-

paragraph.

- -

http://example.org

- }}} - - == More Information - - Read the source code. But it might be difficult to read the source code because this is the first Haskell software of me. - - ; Author - : Chikara Kuwata - ; License - : MIT - - -} module Creole (HTML, Node, Attr, parseHtml) where import Control.Monad.State import Text.ParserCombinators.Parsec data HTML = HTML [Node] data Node = Element String [Attr] [Node] | Text String | Ghost [Node] data Attr = Attr String String instance Show HTML where show (HTML []) = "" show (HTML (x:xs)) = showAll (x:xs) instance Show Node where show (Text t) = concatMap escapeChar t show (Element tag attrs []) = "<" ++ tag ++ (showAll attrs) ++ "/>" show (Element tag attrs nodes) = "<" ++ tag ++ (showAll attrs) ++ ">" ++ (showAll nodes) ++ " ">\n" "h2" -> ">\n" "h3" -> ">\n" "h4" -> ">\n" "h5" -> ">\n" "h6" -> ">\n" "ul" -> ">\n" "ol" -> ">\n" "li" -> ">\n" "p" -> ">\n" "dl" -> ">\n" "dd" -> ">\n" "dt" -> ">\n" "pre" -> ">\n" "table" -> ">\n" "tr" -> ">\n" _ -> ">" show (Ghost nodes) = showAll nodes instance Show Attr where show (Attr n v) = " " ++ n ++ "=" ++ "\"" ++ v ++ "\"" showAll :: (Show a) => [a] -> String showAll = concatMap show escapeChar :: Char -> String escapeChar '&' = "&" escapeChar '<' = "<" escapeChar '>' = ">" escapeChar '"' = """ escapeChar '\'' = "'" escapeChar c = [c] parseHtml :: String -> HTML parseHtml s = HTML $ parseBlock $ eofToLineFeed s eofToLineFeed :: String -> String eofToLineFeed s = reverse $ convert $ reverse s where convert s | length (s) == 0 || length (s) == 1 = "\n\n" ++ s | s !! 0 == '\n' && s !! 1 == '\n' = s | s !! 0 == '\n' && s !! 1 /= '\n' = "\n" ++ s | s !! 0 /= '\n' = "\n\n" ++ s parseBlock :: String -> [Node] parseBlock str = case parse block "" str of Right nodes -> nodes Left err -> [Text ("" ++ (show err))] block :: Parser [Node] block = many component where component = do ws <- try ignoreLineFeed return $ Text "" <|> do h <- try heading return $ Element (fst h) [] [Text (snd h)] <|> do pre <- try preformat return $ Element "pre" [] [Text pre] <|> do li <- try uList return $ Element "ul" [] li <|> do li <- try oList return $ Element "ol" [] li <|> do dl <- try defList return $ Element "dl" [] dl <|> do tbl <- try table return $ Element "table" [] tbl <|> do p <- paragraph return $ Element "p" [] (parseInline p) ignoreLineFeed :: Parser String ignoreLineFeed = do ws <- many1 $ char '\n' return ws heading :: Parser (String, String) heading = do lv <- many1 $ char '=' ws <- many1 $ char ' ' h <- manyTill anyChar $ try $ oneOf "=\n" ws2 <- option ' ' $ char ' ' rest <- many $ char '=' return ((headingLv (length lv)), (stripTail h)) where headingLv n | n <= 6 = "h" ++ (show n) | otherwise = "h6" stripTail s = reverse $ dropWhile (== ' ') $ dropWhile (== '=') $ reverse s preformat :: Parser String preformat = do st <- string "{{{\n" s <- manyTill anyChar $ try $ string "\n}}}\n" return s uList = listBlock "*" oList = listBlock "#" listBlock :: String -> Parser [Node] listBlock symbol = do i <- string symbol ws <- many1 $ char ' ' s <- manyTill anyChar $ try $ string "\n\n" lf <- option "" $ many $ char '\n' return $ parseList (s ++ "\n" ++ symbol ++ " ") symbol parseList :: String -> String -> [Node] parseList str symbol = case parse (listItem symbol) "" str of Right items -> items Left err -> [Text "error!"] listItem :: String -> Parser [Node] listItem symbol = many1 item where item = do s <- manyTill anyChar $ try $ string $ "\n" ++ symbol ++ " " return $ Element "li" [] (n s) where n s = case parse nestedList "" (s ++ "\n\n") of Right nodes -> nodes Left err -> [Text "error"] where nestedList :: Parser [Node] nestedList = many items where items = do nl <- try (listBlock (symbol ++ [symbol !! 0])) return $ Element (listType symbol) [] nl <|> do t <- manyTill anyChar $ try $ char '\n' return $ Ghost $ parseInline t where listType s = case s !! 0 of '*' -> "ul" '#' -> "ol" defList :: Parser [Node] defList = do h <- char ';' ws <- many1 $ char ' ' s <- manyTill anyChar $ try $ string "\n\n" return $ parseDlItem $ [h] ++ ws ++ s ++ "\n" parseDlItem :: String -> [Node] parseDlItem str = case parse dlItem "" str of Right nodes -> nodes Left err -> [Text "error!"] dlItem :: Parser [Node] dlItem = many1 item where item = do h <- choice [char ';', char ':'] ws <- many1 $ char ' ' s <- manyTill anyChar $ try $ char '\n' return $ Element (tag h) [] (parseInline s) where tag h = case h of ';' -> "dt" ':' -> "dd" table :: Parser [Node] table = do t <- char '|' s <- manyTill anyChar $ try $ string "\n\n" return $ map parseRows (lines s) parseRows :: String -> Node parseRows str = case parse tableCells "" str of Right nodes -> Element "tr" [] nodes Left err -> Text "error!" tableCells :: Parser [Node] tableCells = many1 cell where cell = do x <- option "" $ string "|" t <- option "" $ string "=" s <- manyTill anyChar $ try $ char '|' return $ Element (tag t) [] $ parseInline s where tag t = case t of "=" -> "th" "" -> "td" paragraph :: Parser String paragraph = do p <- manyTill anyChar $ try $ string "\n\n" lf <- option "" $ many $ char '\n' return p parseInline :: String -> [Node] parseInline str = case parse inline "" str of Right nodes -> nodes Left err -> [Text "error!"] inline :: Parser [Node] inline = many component where component = do c <- try $ string "~" n <- inlineComponent return $ case n of Text s -> Text $ c ++ s Element tag attr nodes -> Ghost ( [Text $ fst $ toCreoleSyntax n] ++ nodes ++ [Text $ snd $ toCreoleSyntax n]) where toCreoleSyntax elem = case elem of Element "tt" _ _ -> ("{{{", "}}}") Element "a" [(Attr "href" x)] [Text s] -> case x == s of True -> ("", "") False -> (("[[" ++ s ++ "|"), "]]") Element "a" [Attr "href" s] _ -> (("[[" ++ s ++ "|"), "]]") Element "strong" _ _ -> ("**", "**") Element "em" _ _ -> ("//", "//") Element "br" _ _ -> ("\\\\", "") Element "sup" _ _ -> ("^^", "^^") Element "sub" _ _ -> (",,", ",,") <|> do n <- inlineComponent return n delPostfix h x y | x == [] = h | x == y = h | otherwise = delPostfix (h ++ [head x]) (tail x) y sliceList ls si ei | si >= ei = [] | otherwise = ((ls !! si): (sliceList ls (si + 1) ei)) findSubSeqPos a b si | length a < length b = Nothing | length a < (length b) + si = Nothing | otherwise = case (sliceList a si ((length b) + si)) == b of True -> Just si False -> findSubSeqPos a b (si + 1) escapeTag nodes = map et nodes where et (Text s) = Text s et (Element tag attrs children) = Ghost children inlineComponent :: Parser Node inlineComponent = do pre <- try inlinePreformat return $ Element "tt" [] [Text pre] <|> do link <- try hyperLink return $ Element "a" [Attr "href" (fst link)] (parseInline (snd link)) <|> do url <- try urlStr return $ Element "a" [Attr "href" url] [Text url] <|> do t <- try bold return $ Element "strong" [] $ parseInline t <|> do t <- try $ italic return $ Element "em" [] $ parseInline t <|> do t <- try linebreak return $ Element "br" [] [] <|> do t <- try sups return $ Element "sup" [] $ parseInline t <|> do t <- try subs return $ Element "sub" [] $ parseInline t <|> do c <- anyChar return $ Text [c] urlStr :: Parser String urlStr = do a <- string "http" b <- option "" $ string "s" c <- string "://" d <- many1 urlChar return $ concat [a, b, c, d] urlChar :: Parser Char urlChar = alphaNum <|> oneOf "!@#$%^&*()-_=+~:;',./?" hyperLink :: Parser (String, String) hyperLink = do a <- string "[[" b <- urlStr c <- string "|" d <- many1 $ satisfy $ \c -> c /= ']' e <- string "]]" return $ (b, d) bold :: Parser String bold = do a <- string "**" b <- manyTill anyChar $ try $ string "**" return b italic :: Parser String italic = do a <- string "//" b <- manyTill anyChar $ try $ string "//" return b sups :: Parser String sups = do a <- string "^^" b <- manyTill anyChar $ try $ string "^^" return b subs :: Parser String subs = do a <- string ",," b <- manyTill anyChar $ try $ string ",," return b linebreak :: Parser String linebreak = do a <- string "\\\\" return "" inlinePreformat :: Parser String inlinePreformat = do st <- string "{{{" s <- manyTill anyChar $ try $ string "}}}" return s