From 9dee817b61268a4520cee359a5bc97d2dae296f4 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Wed, 11 Feb 2015 23:22:50 -0500 Subject: Rewrite parser, again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite the parser with an actual grammar this time. It’s a bit ugly in places – use of the 'manyTill'' function is especially nasty and ought to be replaced with more <|> combinators, as in the 'literateFile' definition. Many thanks to Chelsea Voss for helping me figure out the grammar for this. --- src/Fragment.hs | 68 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/Fragment.hs b/src/Fragment.hs index 249ffb3..511ea8d 100644 --- a/src/Fragment.hs +++ b/src/Fragment.hs @@ -12,6 +12,15 @@ PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} +{- Grammar: + +S → ε | TEXT | TEXT block S | block S +block → << TEXT >>= code +code → @ | TEXT @ | TEXT reference code | reference code +reference → << TEXT >> + +TEXT -> any sequence of one or more Unicode code points -} + module Fragment ( Fragment(..) , CodeOrReference(..) , parseFragments) where @@ -25,6 +34,8 @@ import Control.Monad (void) import Text.Parsec import Text.Parsec.String +import Debug.Trace + data Fragment = Documentation String | BlockCode String [CodeOrReference] deriving (Eq, Show, Data, Typeable, Generic) @@ -36,36 +47,61 @@ data CodeOrReference = Code String parseFragments :: FilePath -> String -> Either String [Fragment] parseFragments path input = case parse literateFile path input of - Right result -> Right result + Right result -> traceShow result $ Right result Left err -> Left $ show err literateFile :: Parser [Fragment] -literateFile = many (blockCode <|> documentation) - -documentation :: Parser Fragment -documentation = do - body <- many1Till anyChar (eof <|> lookAhead (void blockCode)) - return $ Documentation body +literateFile = (:) <$> blockCode <*> literateFile + <|> do body <- try $ manyTill anyChar (lookAhead blockCode) + block <- blockCode + rest <- literateFile + return $ Documentation body : block : rest + <|> (:[]) . Documentation <$> manyTill anyChar eof blockCode :: Parser Fragment blockCode = do - void $ string "<<" - name <- many1Till (noneOf "\r\n") (try (string ">>=" "start of code block")) - body <- many1Till (reference <|> code) (char '@') + void $ try $ string "<<" + name <- many1Till (noneOf "\r\n") (try $ string ">>=") + body <- code return $ BlockCode name body -code :: Parser CodeOrReference -code = do - body <- many1Till anyChar (lookAhead $ (void (char '@')) - <|> (void reference)) - return $ Code body + +data CodeTerminator = AtSign + | BeginReference + deriving (Eq, Show, Data, Typeable, Generic) + +atSign :: Parser CodeTerminator +atSign = char '@' >> return AtSign + +beginReference :: Parser CodeTerminator +beginReference = lookAhead reference >> return BeginReference + +code :: Parser [CodeOrReference] +code = (:) <$> reference <*> code + <|> do (body, exitChar) <- manyTill' anyChar (atSign <|> beginReference) + case exitChar of + AtSign -> return [Code body] + BeginReference -> do + ref <- option [] $ (:[]) <$> reference + rest <- code + return $ Code body : ref ++ rest reference :: Parser CodeOrReference reference = do - void $ string "<<" + void $ try $ string "<<" name <- many1Till anyChar (try $ string ">>") return $ Reference name +manyTill' :: Stream s m t + => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end) +manyTill' p end = scan + where scan = do exit <- end + return ([], exit) + <|> + do x <- p + (xs, exit) <- scan + return (x:xs, exit) + many1Till :: Stream s m t => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a] many1Till p end = (:) <$> p <*> manyTill p end -- cgit v1.2.3