diff options
author | Benjamin Barenblat <bbaren@mit.edu> | 2015-02-11 23:22:50 -0500 |
---|---|---|
committer | Benjamin Barenblat <bbaren@mit.edu> | 2015-02-11 23:22:50 -0500 |
commit | 9dee817b61268a4520cee359a5bc97d2dae296f4 (patch) | |
tree | 1591e41a4731836cf0a3ba513d8d25d95b864926 /src | |
parent | 31ef18834d4291b531e2e5a993216fc18f891205 (diff) |
Rewrite parser, again
Rewrite the parser with an actual grammar this time. It’s a bit ugly in
places – use of the 'manyTill'' function is especially nasty and ought
to be replaced with more <|> combinators, as in the 'literateFile'
definition.
Many thanks to Chelsea Voss <csvoss@mit.edu> for helping me figure out
the grammar for this.
Diffstat (limited to 'src')
-rw-r--r-- | src/Fragment.hs | 68 |
1 files changed, 52 insertions, 16 deletions
diff --git a/src/Fragment.hs b/src/Fragment.hs index 249ffb3..511ea8d 100644 --- a/src/Fragment.hs +++ b/src/Fragment.hs @@ -12,6 +12,15 @@ PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. -} +{- Grammar: + +S → ε | TEXT | TEXT block S | block S +block → << TEXT >>= code +code → @ | TEXT @ | TEXT reference code | reference code +reference → << TEXT >> + +TEXT -> any sequence of one or more Unicode code points -} + module Fragment ( Fragment(..) , CodeOrReference(..) , parseFragments) where @@ -25,6 +34,8 @@ import Control.Monad (void) import Text.Parsec import Text.Parsec.String +import Debug.Trace + data Fragment = Documentation String | BlockCode String [CodeOrReference] deriving (Eq, Show, Data, Typeable, Generic) @@ -36,36 +47,61 @@ data CodeOrReference = Code String parseFragments :: FilePath -> String -> Either String [Fragment] parseFragments path input = case parse literateFile path input of - Right result -> Right result + Right result -> traceShow result $ Right result Left err -> Left $ show err literateFile :: Parser [Fragment] -literateFile = many (blockCode <|> documentation) - -documentation :: Parser Fragment -documentation = do - body <- many1Till anyChar (eof <|> lookAhead (void blockCode)) - return $ Documentation body +literateFile = (:) <$> blockCode <*> literateFile + <|> do body <- try $ manyTill anyChar (lookAhead blockCode) + block <- blockCode + rest <- literateFile + return $ Documentation body : block : rest + <|> (:[]) . Documentation <$> manyTill anyChar eof blockCode :: Parser Fragment blockCode = do - void $ string "<<" - name <- many1Till (noneOf "\r\n") (try (string ">>=" <?> "start of code block")) - body <- many1Till (reference <|> code) (char '@') + void $ try $ string "<<" + name <- many1Till (noneOf "\r\n") (try $ string ">>=") + body <- code return $ BlockCode name body -code :: Parser CodeOrReference -code = do - body <- many1Till anyChar (lookAhead $ (void (char '@')) - <|> (void reference)) - return $ Code body + +data CodeTerminator = AtSign + | BeginReference + deriving (Eq, Show, Data, Typeable, Generic) + +atSign :: Parser CodeTerminator +atSign = char '@' >> return AtSign + +beginReference :: Parser CodeTerminator +beginReference = lookAhead reference >> return BeginReference + +code :: Parser [CodeOrReference] +code = (:) <$> reference <*> code + <|> do (body, exitChar) <- manyTill' anyChar (atSign <|> beginReference) + case exitChar of + AtSign -> return [Code body] + BeginReference -> do + ref <- option [] $ (:[]) <$> reference + rest <- code + return $ Code body : ref ++ rest reference :: Parser CodeOrReference reference = do - void $ string "<<" + void $ try $ string "<<" name <- many1Till anyChar (try $ string ">>") return $ Reference name +manyTill' :: Stream s m t + => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end) +manyTill' p end = scan + where scan = do exit <- end + return ([], exit) + <|> + do x <- p + (xs, exit) <- scan + return (x:xs, exit) + many1Till :: Stream s m t => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a] many1Till p end = (:) <$> p <*> manyTill p end |