summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Benjamin Barenblat <bbaren@mit.edu>2015-02-11 23:22:50 -0500
committerGravatar Benjamin Barenblat <bbaren@mit.edu>2015-02-11 23:22:50 -0500
commit9dee817b61268a4520cee359a5bc97d2dae296f4 (patch)
tree1591e41a4731836cf0a3ba513d8d25d95b864926 /src
parent31ef18834d4291b531e2e5a993216fc18f891205 (diff)
Rewrite parser, again
Rewrite the parser with an actual grammar this time. It’s a bit ugly in places – use of the 'manyTill'' function is especially nasty and ought to be replaced with more <|> combinators, as in the 'literateFile' definition. Many thanks to Chelsea Voss <csvoss@mit.edu> for helping me figure out the grammar for this.
Diffstat (limited to 'src')
-rw-r--r--src/Fragment.hs68
1 files changed, 52 insertions, 16 deletions
diff --git a/src/Fragment.hs b/src/Fragment.hs
index 249ffb3..511ea8d 100644
--- a/src/Fragment.hs
+++ b/src/Fragment.hs
@@ -12,6 +12,15 @@ PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see <http://www.gnu.org/licenses/>. -}
+{- Grammar:
+
+S → ε | TEXT | TEXT block S | block S
+block → << TEXT >>= code
+code → @ | TEXT @ | TEXT reference code | reference code
+reference → << TEXT >>
+
+TEXT -> any sequence of one or more Unicode code points -}
+
module Fragment ( Fragment(..)
, CodeOrReference(..)
, parseFragments) where
@@ -25,6 +34,8 @@ import Control.Monad (void)
import Text.Parsec
import Text.Parsec.String
+import Debug.Trace
+
data Fragment = Documentation String
| BlockCode String [CodeOrReference]
deriving (Eq, Show, Data, Typeable, Generic)
@@ -36,36 +47,61 @@ data CodeOrReference = Code String
parseFragments :: FilePath -> String -> Either String [Fragment]
parseFragments path input =
case parse literateFile path input of
- Right result -> Right result
+ Right result -> traceShow result $ Right result
Left err -> Left $ show err
literateFile :: Parser [Fragment]
-literateFile = many (blockCode <|> documentation)
-
-documentation :: Parser Fragment
-documentation = do
- body <- many1Till anyChar (eof <|> lookAhead (void blockCode))
- return $ Documentation body
+literateFile = (:) <$> blockCode <*> literateFile
+ <|> do body <- try $ manyTill anyChar (lookAhead blockCode)
+ block <- blockCode
+ rest <- literateFile
+ return $ Documentation body : block : rest
+ <|> (:[]) . Documentation <$> manyTill anyChar eof
blockCode :: Parser Fragment
blockCode = do
- void $ string "<<"
- name <- many1Till (noneOf "\r\n") (try (string ">>=" <?> "start of code block"))
- body <- many1Till (reference <|> code) (char '@')
+ void $ try $ string "<<"
+ name <- many1Till (noneOf "\r\n") (try $ string ">>=")
+ body <- code
return $ BlockCode name body
-code :: Parser CodeOrReference
-code = do
- body <- many1Till anyChar (lookAhead $ (void (char '@'))
- <|> (void reference))
- return $ Code body
+
+data CodeTerminator = AtSign
+ | BeginReference
+ deriving (Eq, Show, Data, Typeable, Generic)
+
+atSign :: Parser CodeTerminator
+atSign = char '@' >> return AtSign
+
+beginReference :: Parser CodeTerminator
+beginReference = lookAhead reference >> return BeginReference
+
+code :: Parser [CodeOrReference]
+code = (:) <$> reference <*> code
+ <|> do (body, exitChar) <- manyTill' anyChar (atSign <|> beginReference)
+ case exitChar of
+ AtSign -> return [Code body]
+ BeginReference -> do
+ ref <- option [] $ (:[]) <$> reference
+ rest <- code
+ return $ Code body : ref ++ rest
reference :: Parser CodeOrReference
reference = do
- void $ string "<<"
+ void $ try $ string "<<"
name <- many1Till anyChar (try $ string ">>")
return $ Reference name
+manyTill' :: Stream s m t
+ => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
+manyTill' p end = scan
+ where scan = do exit <- end
+ return ([], exit)
+ <|>
+ do x <- p
+ (xs, exit) <- scan
+ return (x:xs, exit)
+
many1Till :: Stream s m t
=> ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
many1Till p end = (:) <$> p <*> manyTill p end