1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
|
{- Expands template haskell splices
-
- First, the code must be built with a ghc that supports TH,
- and the splices dumped to a log. For example:
- cabal build --ghc-options=-ddump-splices 2>&1 | tee log
-
- Along with the log, a headers file may also be provided, containing
- additional imports needed by the template haskell code.
-
- This program will parse the log, and expand all splices therein,
- writing files to the specified destdir (which can be "." to modify
- the source tree directly). They can then be built a second
- time, with a ghc that does not support TH.
-
- Note that template haskell code may refer to symbols that are not
- exported by the library that defines the TH code. In this case,
- the library has to be modifed to export those symbols.
-
- There can also be other problems with the generated code; it may
- need modifications to compile.
-
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
import Text.Parsec
import Text.Parsec.String
import Control.Applicative ((<$>))
import Data.Either
import Data.List
import Data.String.Utils
import Data.Char
import System.Environment
import System.FilePath
import System.Directory
import Control.Monad
import Utility.Monad
import Utility.Misc
import Utility.Exception
import Utility.Path
data Coord = Coord
{ coordLine :: Int
, coordColumn :: Int
}
deriving (Read, Show)
offsetCoord :: Coord -> Coord -> Coord
offsetCoord a b = Coord
(coordLine a - coordLine b)
(coordColumn a - coordColumn b)
data SpliceType = SpliceExpression | SpliceDeclaration
deriving (Read, Show, Eq)
data Splice = Splice
{ splicedFile :: FilePath
, spliceStart :: Coord
, spliceEnd :: Coord
, splicedExpression :: String
, splicedCode :: String
, spliceType :: SpliceType
}
deriving (Read, Show)
isExpressionSplice :: Splice -> Bool
isExpressionSplice s = spliceType s == SpliceExpression
number :: Parser Int
number = read <$> many1 digit
{- A pair of Coords is written in one of three ways:
- "95:21-73", "1:1", or "(92,25)-(94,2)"
-}
coordsParser :: Parser (Coord, Coord)
coordsParser = (try singleline <|> try weird <|> multiline) <?> "Coords"
where
singleline = do
line <- number
char ':'
startcol <- number
char '-'
endcol <- number
return $ (Coord line startcol, Coord line endcol)
weird = do
line <- number
char ':'
col <- number
return $ (Coord line col, Coord line col)
multiline = do
start <- fromparens
char '-'
end <- fromparens
return $ (start, end)
fromparens = between (char '(') (char ')') $ do
line <- number
char ','
col <- number
return $ Coord line col
indent :: Parser String
indent = many1 $ char ' '
restOfLine :: Parser String
restOfLine = newline `after` many (noneOf "\n")
indentedLine :: Parser String
indentedLine = indent >> restOfLine
spliceParser :: Parser Splice
spliceParser = do
file <- many1 (noneOf ":\n")
char ':'
(start, end) <- coordsParser
string ": Splicing "
splicetype <- tosplicetype
<$> (string "expression" <|> string "declarations")
newline
getthline <- expressionextractor
expression <- unlines <$> many1 getthline
indent
string "======>"
newline
getcodeline <- expressionextractor
realcoords <- try (Right <$> getrealcoords file) <|> (Left <$> getcodeline)
codelines <- many getcodeline
return $ case realcoords of
Left firstcodeline ->
Splice file start end expression
(unlines $ firstcodeline:codelines)
splicetype
Right (realstart, realend) ->
Splice file realstart realend expression
(unlines codelines)
splicetype
where
tosplicetype "declarations" = SpliceDeclaration
tosplicetype "expression" = SpliceExpression
tosplicetype s = error $ "unknown splice type: " ++ s
{- All lines of the indented expression start with the same
- indent, which is stripped. Any other indentation is preserved. -}
expressionextractor = do
i <- lookAhead indent
return $ try $ do
string i
restOfLine
{- When splicing declarations, GHC will output a splice
- at 1:1, and then inside the splice code block,
- the first line will give the actual coordinates of the
- line that was spliced. -}
getrealcoords file = do
indent
string file
char ':'
char '\n' `after` coordsParser
{- Extracts the splices, ignoring the rest of the compiler output. -}
splicesExtractor :: Parser [Splice]
splicesExtractor = rights <$> many extract
where
extract = try (Right <$> spliceParser) <|> (Left <$> compilerJunkLine)
compilerJunkLine = restOfLine
{- Modifies the source file, expanding the splices, which all must
- have the same splicedFile. Writes the new file to the destdir.
-
- Each splice's Coords refer to the original position in the file,
- and not to its position after any previous splices may have inserted
- or removed lines.
-
- To deal with this complication, the file is broken into logical lines
- (which can contain any String, including a multiline or empty string).
- Each splice is assumed to be on its own block of lines; two
- splices on the same line is not currently supported.
- This means that a splice can modify the logical lines within its block
- as it likes, without interfering with the Coords of other splices.
-
- When splicing in declarations, they are not placed on the line
- that defined them, because at least with Yesod, that line has another TH
- splice, and things would get mixed up. Since declarations are stand
- alone, they can go anywhere, and are added to the very end of the file.
-
- As well as expanding splices, this can add a block of imports to the
- file. These are put right before the first line in the file that
- starts with "import "
-}
applySplices :: FilePath -> Maybe String -> [Splice] -> IO ()
applySplices destdir imports splices@(first:_) = do
let f = splicedFile first
let dest = (destdir </> f)
lls <- map (++ "\n") . lines <$> readFileStrict f
createDirectoryIfMissing True (parentDir dest)
let newcontent = concat $ addimports $
expanddeclarations declarationsplices $
expandexpressions lls expressionsplices
oldcontent <- catchMaybeIO $ readFileStrict dest
when (oldcontent /= Just newcontent) $ do
putStrLn $ "splicing " ++ f
writeFile dest newcontent
where
(expressionsplices, declarationsplices) =
partition isExpressionSplice splices
expandexpressions lls [] = lls
expandexpressions lls (s:rest) =
expandexpressions (expandExpressionSplice s lls) rest
expanddeclarations [] lls = lls
expanddeclarations l lls = lls ++ map (mangleCode . splicedCode) l
addimports lls = case imports of
Nothing -> lls
Just v ->
let (start, end) = break ("import " `isPrefixOf`) lls
in if null end
then start
else concat
[ start
, [v]
, end
]
expandExpressionSplice :: Splice -> [String] -> [String]
expandExpressionSplice s lls = concat [before, spliced:padding, end]
where
cs = spliceStart s
ce = spliceEnd s
(before, rest) = splitAt (coordLine cs - 1) lls
(oldlines, end) = splitAt (1 + coordLine (offsetCoord ce cs)) rest
(splicestart, padding, spliceend) = case map expandtabs oldlines of
ss:r
| null r -> (ss, [], ss)
| otherwise -> (ss, take (length r) (repeat []), last r)
_ -> ([], [], [])
spliced = concat
[ joinsplice $ deqqstart $ take (coordColumn cs - 1) splicestart
, addindent (findindent splicestart) (mangleCode $ splicedCode s)
, deqqend $ drop (coordColumn ce) spliceend
]
{- coordinates assume tabs are expanded to 8 spaces -}
expandtabs = replace "\t" (take 8 $ repeat ' ')
{- splicing leaves $() quasiquote behind; remove it -}
deqqstart s = case reverse s of
('(':'$':rest) -> reverse rest
_ -> s
deqqend (')':s) = s
deqqend s = s
{- Prepare the code that comes just before the splice so
- the splice will combine with it appropriately. -}
joinsplice s
-- all indentation? Skip it, we'll use the splice's indentation
| all isSpace s = ""
-- function definition needs no preparation
-- ie: foo = $(splice)
| "=" `isSuffixOf` s' = s
-- nor does lambda definition or case expression
| "->" `isSuffixOf` s' = s
-- nor does a let .. in declaration
| "in" `isSuffixOf` s' = s
-- already have a $ to set off the splice
-- ie: foo $ $(splice)
| "$" `isSuffixOf` s' = s
-- need to add a $ to set off the splice
-- ie: bar $(splice)
| otherwise = s ++ " $ "
where
s' = filter (not . isSpace) s
findindent = length . takeWhile isSpace
addindent n = unlines . map (i ++) . lines
where
i = take n $ repeat ' '
{- Tweaks code output by GHC in splices to actually build. Yipes. -}
mangleCode :: String -> String
mangleCode = declaration_parens
. remove_declaration_splices
. nested_instances
. fix_bad_escape
. remove_package_version
where
{- GHC may incorrectly escape "}" within a multi-line string. -}
fix_bad_escape = replace " \\}" " }"
{- GHC may output this:
-
- instance RenderRoute WebApp where
- data instance Route WebApp
- ^^^^^^^^
- The marked word should not be there.
-
- FIXME: This is a yesod-specific hack, it should look for the
- outer instance.
-}
nested_instances = replace " data instance Route" " data Route"
{- GHC does not properly parenthesise generated data type
- declarations. -}
declaration_parens = replace "StaticR Route Static" "StaticR (Route Static)"
{- Outright hack: It should remove declaration splices, and does
- not, so here I remove the declaration splices used for yesod.
-}
remove_declaration_splices s
| "publicFiles" `isPrefixOf` s = ""
| "mkYesodData" `isPrefixOf` s = ""
| otherwise = s
{- GHC may add full package and version qualifications for
- symbols from unimported modules. We don't want these.
-
- Examples:
- "blaze-html-0.4.3.1:Text.Blaze.Internal.preEscapedText"
- "ghc-prim:GHC.Types.:"
-}
remove_package_version s = case parse findQualifiedSymbols "" s of
Left e -> s
Right symbols -> concat $
map (either (\c -> [c]) mangleSymbol) symbols
findQualifiedSymbols :: Parser [Either Char String]
findQualifiedSymbols = many $
try (Right <$> qualifiedSymbol) <|> (Left <$> anyChar)
qualifiedSymbol :: Parser String
qualifiedSymbol = do
token
char ':'
token
token :: Parser String
token = many1 $ satisfy isAlphaNum <|> oneOf "-.'"
mangleSymbol "GHC.Types." = ""
mangleSymbol s = s
main :: IO ()
main = go =<< getArgs
where
go (destdir:log:header:[]) = run destdir log (Just header)
go (destdir:log:[]) = run destdir log Nothing
go _ = error "usage: EvilSplicer destdir logfile [headerfile]"
run destdir log mheader = do
r <- parseFromFile splicesExtractor log
case r of
Left e -> error $ show e
Right splices -> do
let groups = groupBy (\a b -> splicedFile a == splicedFile b) splices
imports <- maybe (return Nothing) (catchMaybeIO . readFile) mheader
mapM_ (applySplices destdir imports) groups
|