summaryrefslogtreecommitdiff
path: root/Build/EvilSplicer.hs
blob: 442ac5952270edd4a312157d84ce4b0c24591d09 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
{- Expands template haskell splices
 -
 - You should probably just use http://hackage.haskell.org/package/zeroth
 - instead. I wish I had known about it before writing this.
 -
 - First, the code must be built with a ghc that supports TH,
 - and the splices dumped to a log. For example:
 -   cabal build --ghc-options=-ddump-splices 2>&1 | tee log
 -
 - Along with the log, a headers file may also be provided, containing
 - additional imports needed by the template haskell code.
 -
 - This program will parse the log, and expand all splices therein,
 - writing files to the specified destdir (which can be "." to modify
 - the source tree directly). They can then be built a second
 - time, with a ghc that does not support TH.
 -
 - Note that template haskell code may refer to symbols that are not
 - exported by the library that defines the TH code. In this case,
 - the library has to be modifed to export those symbols.
 -
 - There can also be other problems with the generated code; it may
 - need modifications to compile.
 -
 -
 - Copyright 2013 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Main where

import Text.Parsec
import Text.Parsec.String
import Control.Applicative ((<$>))
import Data.Either
import Data.List
import Data.String.Utils
import Data.Char
import System.Environment
import System.FilePath
import System.Directory
import Control.Monad

import Utility.Monad
import Utility.Misc
import Utility.Exception
import Utility.Path

data Coord = Coord
	{ coordLine :: Int
	, coordColumn :: Int
	}
	deriving (Read, Show)

offsetCoord :: Coord -> Coord -> Coord
offsetCoord a b = Coord
	(coordLine a - coordLine b)
	(coordColumn a - coordColumn b)

data SpliceType = SpliceExpression | SpliceDeclaration
	deriving (Read, Show, Eq)

data Splice = Splice
	{ splicedFile :: FilePath
	, spliceStart :: Coord
	, spliceEnd :: Coord
	, splicedExpression :: String
	, splicedCode :: String
	, spliceType :: SpliceType
	}
	deriving (Read, Show)

isExpressionSplice :: Splice -> Bool
isExpressionSplice s = spliceType s == SpliceExpression

number :: Parser Int
number = read <$> many1 digit

{- A pair of Coords is written in one of three ways:
 - "95:21-73", "1:1", or "(92,25)-(94,2)"
 -}
coordsParser :: Parser (Coord, Coord)
coordsParser = (try singleline <|> try weird <|> multiline) <?> "Coords"
  where
  	singleline = do
		line <- number
		char ':'
		startcol <- number
		char '-'
		endcol <- number
		return $ (Coord line startcol, Coord line endcol)

	weird = do
		line <- number
		char ':'
		col <- number
		return $ (Coord line col, Coord line col)

	multiline = do
		start <- fromparens
		char '-'
		end <- fromparens
		return $ (start, end)

	fromparens = between (char '(') (char ')') $ do
		line <- number
		char ','
		col <- number
		return $ Coord line col

indent :: Parser String
indent = many1 $ char ' '

restOfLine :: Parser String
restOfLine = newline `after` many (noneOf "\n")

indentedLine :: Parser String
indentedLine = indent >> restOfLine

spliceParser :: Parser Splice
spliceParser = do
	file <- many1 (noneOf ":\n")
	char ':'
	(start, end) <- coordsParser
	string ": Splicing "
	splicetype <- tosplicetype
		<$> (string "expression" <|> string "declarations")
	newline

	getthline <- expressionextractor
	expression <- unlines <$> many1 getthline

	indent
	string "======>"	
	newline

	getcodeline <- expressionextractor
	realcoords <- try (Right <$> getrealcoords file) <|> (Left <$> getcodeline)
	codelines <- many getcodeline
	return $ case realcoords of
		Left firstcodeline -> 
			Splice file start end expression
				(unlines $ firstcodeline:codelines)
				splicetype
		Right (realstart, realend) ->
			Splice file realstart realend expression
				(unlines codelines)
				splicetype
  where
  	tosplicetype "declarations" = SpliceDeclaration
	tosplicetype "expression" = SpliceExpression
	tosplicetype s = error $ "unknown splice type: " ++ s

	{- All lines of the indented expression start with the same
	 - indent, which is stripped. Any other indentation is preserved. -}
	expressionextractor = do
		i <- lookAhead indent
		return $ try $ do
			string i
			restOfLine
	
	{- When splicing declarations, GHC will output a splice
	 - at 1:1, and then inside the splice code block,
	 - the first line will give the actual coordinates of the
	 - line that was spliced. -}
	getrealcoords file = do
		indent
		string file
		char ':'
		char '\n' `after` coordsParser

{- Extracts the splices, ignoring the rest of the compiler output. -}
splicesExtractor :: Parser [Splice]
splicesExtractor = rights <$> many extract
  where
  	extract = try (Right <$> spliceParser) <|> (Left <$> compilerJunkLine)
	compilerJunkLine = restOfLine

{- Modifies the source file, expanding the splices, which all must
 - have the same splicedFile. Writes the new file to the destdir.
 -
 - Each splice's Coords refer to the original position in the file,
 - and not to its position after any previous splices may have inserted
 - or removed lines.
 -
 - To deal with this complication, the file is broken into logical lines
 - (which can contain any String, including a multiline or empty string).
 - Each splice is assumed to be on its own block of lines; two
 - splices on the same line is not currently supported.
 - This means that a splice can modify the logical lines within its block
 - as it likes, without interfering with the Coords of other splices.
 -
 - As well as expanding splices, this can add a block of imports to the
 - file. These are put right before the first line in the file that
 - starts with "import "
 -}
applySplices :: FilePath -> Maybe String -> [Splice] -> IO ()
applySplices destdir imports splices@(first:_) = do
	let f = splicedFile first
	let dest = (destdir </> f)
	lls <- map (++ "\n") . lines <$> readFileStrict f
	createDirectoryIfMissing True (parentDir dest)
	let newcontent = concat $ addimports $ expand lls splices
	oldcontent <- catchMaybeIO $ readFileStrict dest
	when (oldcontent /= Just newcontent) $ do
		putStrLn $ "splicing " ++ f
		writeFile dest newcontent
  where
  	expand lls [] = lls
  	expand lls (s:rest)
		| isExpressionSplice s = expand (expandExpressionSplice s lls) rest
		| otherwise = expand (expandDeclarationSplice s lls) rest

	addimports lls = case imports of
		Nothing -> lls
		Just v ->
			let (start, end) = break ("import " `isPrefixOf`) lls
			in if null end
				then start
				else concat
					[ start
					, [v]
					, end
					]

{- Declaration splices are expanded to replace their whole line. -}
expandDeclarationSplice :: Splice -> [String] -> [String]
expandDeclarationSplice s lls = concat [before, [splice], end]
  where
	cs = spliceStart s
	ce = spliceEnd s

	(before, rest) = splitAt (coordLine cs - 1) lls
	(_oldlines, end) = splitAt (1 + coordLine (offsetCoord ce cs)) rest
	splice = mangleCode $ splicedCode s

{- Expression splices are expanded within their line. -}
expandExpressionSplice :: Splice -> [String] -> [String]
expandExpressionSplice s lls = concat [before, spliced:padding, end]
  where
	cs = spliceStart s
	ce = spliceEnd s

	(before, rest) = splitAt (coordLine cs - 1) lls
	(oldlines, end) = splitAt (1 + coordLine (offsetCoord ce cs)) rest
	(splicestart, padding, spliceend) = case map expandtabs oldlines of
		ss:r
			| null r -> (ss, [], ss)
			| otherwise -> (ss, take (length r) (repeat []), last r)
		_ -> ([], [], [])
	spliced = concat
		[ joinsplice $ deqqstart $ take (coordColumn cs - 1) splicestart
		, addindent (findindent splicestart) (mangleCode $ splicedCode s)
		, deqqend $ drop (coordColumn ce) spliceend
		]

	{- coordinates assume tabs are expanded to 8 spaces -}
	expandtabs = replace "\t" (take 8 $ repeat ' ')

	{- splicing leaves $() quasiquote behind; remove it -}
	deqqstart s = case reverse s of
		('(':'$':rest) -> reverse rest
		_ -> s
	deqqend (')':s) = s
	deqqend s = s

	{- Prepare the code that comes just before the splice so
	 - the splice will combine with it appropriately. -}
	joinsplice s
		-- all indentation? Skip it, we'll use the splice's indentation
		| all isSpace s = ""
		-- function definition needs no preparation
		-- ie: foo = $(splice)
		| "=" `isSuffixOf` s' = s
		-- nor does lambda definition or case expression
		| "->" `isSuffixOf` s' = s
		-- nor does a let .. in declaration
		| "in" `isSuffixOf` s' = s
		-- already have a $ to set off the splice
		-- ie: foo $ $(splice)
		| "$" `isSuffixOf` s' = s
		-- need to add a $ to set off the splice
		-- ie: bar $(splice)
		| otherwise = s ++ " $ "
	  where
	  	s' = filter (not . isSpace) s

	findindent = length . takeWhile isSpace
	addindent n = unlines . map (i ++) . lines
	  where
	  	i = take n $ repeat ' '

{- Tweaks code output by GHC in splices to actually build. Yipes. -}
mangleCode :: String -> String
mangleCode = flip_colon
	. remove_unnecessary_type_signatures
	. lambdaparenhack
	. lambdaparens
	. declaration_parens
	. case_layout
	. case_layout_multiline
	. yesod_url_render_hack
	. text_builder_hack
	. nested_instances 
	. collapse_multiline_strings
	. remove_package_version
	. emptylambda
  where
  	{- Lambdas are often output without parens around them.
	 - This breaks when the lambda is immediately applied to a
	 - parameter.
	 - 
	 - For example:
	 -
	 - renderRoute (StaticR sub_a1nUH)
	 -   = \ (a_a1nUI, b_a1nUJ)
	 -       -> (((pack "static") : a_a1nUI),
	 -            b_a1nUJ)
	 -       (renderRoute sub_a1nUH)
	 -
	 - There are sometimes many lines of lambda code that need to be
	 - parenthesised. Approach: find the "->" and scan down the
	 - column to the first non-whitespace. This is assumed
	 - to be the expression after the lambda.
	 -
	 - Runs recursively on the body of the lambda, to handle nested
	 - lambdas.
	 -}
	lambdaparens = parsecAndReplace $ do
		-- skip lambdas inside tuples or parens
		prefix <- noneOf "(, \n"
		preindent <- many1 $ oneOf " \n"
		string "\\ "
		lambdaparams <- restofline
		continuedlambdaparams <- many $ try $ do
			indent <- many1 $ char ' '
			p <- satisfy isLetter
			aram <- many $ satisfy isAlphaNum <|> oneOf "_"
			newline
			return $ indent ++ p:aram ++ "\n"
		indent <- many1 $ char ' '
		string "-> "
		firstline <- restofline
		lambdalines <- many $ try $ do
			string indent
			char ' '
			l <- restofline
			return $ indent ++ " " ++ l
		return $ concat 
			[ prefix:preindent
			, "(\\ " ++ lambdaparams ++ "\n"
			, concat continuedlambdaparams
			, indent ++ "-> "
			, lambdaparens $ intercalate "\n" (firstline:lambdalines)
			, ")\n"
			]
	
	{- Hack to add missing parens in a specific case in yesod
	 - static route code.
	 -
	 -     StaticR
	 -     yesod_dispatch_env_a4iDV
	 -     (\ p_a4iE2 r_a4iE3
	 -        -> r_a4iE3 {Network.Wai.pathInfo = p_a4iE2}
	 -        xrest_a4iDT req_a4iDW)) }
	 -
	 - Need to add another paren around the lambda, and close it
	 - before its parameters. lambdaparens misses this one because
	 - there is already one paren present.
	 -
	 - FIXME: This is a hack. lambdaparens could just always add a
	 - layer of parens even when a lambda seems to be in parent.
	 -}
	lambdaparenhack = parsecAndReplace $ do
		indent <- many1 $ char ' '
		staticr <- string "StaticR"
		newline
		string indent
		yesod_dispatch_env <- restofline
		string indent
		lambdaprefix <- string "(\\ "
		l1 <- restofline
		string indent
		lambdaarrow <- string "   ->"
		l2 <- restofline
		return $ unlines
			[ indent, staticr
			, indent, yesod_dispatch_env
			, indent, "(", lambdaprefix, l1
			, indent, lambdaarrow, l2, ")"
			]

	restofline = manyTill (noneOf "\n") newline

  	{- For some reason, GHC sometimes doesn't like the multiline
	 - strings it creates. It seems to get hung up on \{ at the
	 - start of a new line sometimes, wanting it to not be escaped.
	 -
	 - To work around what is likely a GHC bug, just collapse
	 - multiline strings. -}
	collapse_multiline_strings = parsecAndReplace $ do
		string "\\\n"
		many1 $ oneOf " \t"
		string "\\"
		return "\\n"

	{- GHC outputs splices using explicit braces rather than layout.
	 - For a case expression, it does something weird:
	 -
	 - case foo of {
	 -   xxx -> blah
	 -   yyy -> blah };
	 -
	 - This is not legal Haskell; the statements in the case must be
	 - separated by ';'
	 -
	 - To fix, we could just put a semicolon at the start of every line
	 - containing " -> " ... Except that lambdas also contain that.
	 - But we can get around that: GHC outputs lambas like this:
	 -
	 - \ foo
	 -   -> bar
	 -
	 - Or like this:
	 -
	 - \ foo -> bar
	 -
	 - So, we can put the semicolon at the start of every line
	 - containing " -> " unless there's a "\ " first, or it's
	 - all whitespace up until it.
	 -}
	case_layout = parsecAndReplace $ do
		newline
		indent <- many1 $ char ' '
		prefix <- manyTill (noneOf "\n") (try (string "-> "))
		if length prefix > 10
			then unexpected "too long a prefix"
			else if "\\ " `isInfixOf` prefix
				then unexpected "lambda expression"
				else if null prefix
					then unexpected "second line of lambda"
					else return $ "\n" ++ indent ++ "; " ++ prefix ++ " -> "
	{- Sometimes cases themselves span multiple lines:
	 -
	 - Nothing
	 -   -> foo
	 -}
	case_layout_multiline = parsecAndReplace $ do
		newline
		indent <- many1 $ char ' '
		firstline <- restofline

		string indent
		indent2 <- many1 $ char ' '
		string "-> "
		if "\\ " `isInfixOf` firstline
			then unexpected "lambda expression"
			else return $ "\n" ++ indent ++ "; " ++ firstline ++ "\n"
				++ indent ++ indent2 ++ "-> "

	{- (foo, \ -> bar) is not valid haskell, GHC.
	 - Change to (foo, bar)
	 -
	 - (Does this ever happen outside a tuple? Only saw
	 - it inside them..
	 -}
	emptylambda = replace ", \\ -> " ", "

	{- GHC may output this:
	 -
	 - instance RenderRoute WebApp where
	 -   data instance Route WebApp
	 -        ^^^^^^^^
	 - The marked word should not be there.
	 -
	 - FIXME: This is a yesod-specific hack, it should look for the
	 - outer instance.
	 -}
	nested_instances = replace "  data instance Route" "  data Route" 

	{- GHC does not properly parenthesise generated data type
	 - declarations. -}
	declaration_parens = replace "StaticR Route Static" "StaticR (Route Static)"

	{- A type signature is sometimes given for an entire lambda,
	 - which is not properly parenthesized or laid out. This is a
	 - hack to remove one specific case where this happens and the
	 - signature is easily inferred, so is just removed.
	 -}
	remove_unnecessary_type_signatures = parsecAndReplace $ do
		string " ::"
		newline
		many1 $ char ' '
		string "Text.Css.Block Text.Css.Resolved"
		newline
		return ""

	{- GHC may add full package and version qualifications for
	 - symbols from unimported modules. We don't want these.
	 -
	 - Examples:
	 -   "blaze-html-0.4.3.1:Text.Blaze.Internal.preEscapedText" 
	 -   "ghc-prim:GHC.Types.:"
	 -}
	remove_package_version = parsecAndReplace $
		mangleSymbol <$> qualifiedSymbol

	mangleSymbol "GHC.Types." = ""
	mangleSymbol "GHC.Tuple." = ""
	mangleSymbol s = s

	qualifiedSymbol :: Parser String
	qualifiedSymbol = do
		s <- token
		char ':'
		if length s < 5
			then unexpected "too short to be a namespace"
			else do
				token

	token :: Parser String
	token = do
		t <- satisfy isLetter
		oken <- many $ satisfy isAlphaNum <|> oneOf "-.'"
		return $ t:oken

	{- This works when it's "GHC.Types.:", but we strip
	 - that above, so have to fix up after it here. 
	 - The ; is added by case_layout. -}
	flip_colon = replace "; : _ " "; _ : "

{- This works around a problem in the expanded template haskell for Yesod
 - type-safe url rendering.
 -
 - It generates code like this:
 - 
 -                                  (toHtml
 -                                     (\ u_a2ehE -> urender_a2ehD u_a2ehE []
 -                                        (CloseAlert aid)))));
 -
 - Where urender_a2ehD is the function returned by getUrlRenderParams.
 - But, that function that only takes 2 params, not 3.
 - And toHtml doesn't take a parameter at all!
 - 
 - So, this modifes the code, to look like this:
 - 
 -                                  (toHtml
 -                                     (flip urender_a2ehD []
 -                                        (CloseAlert aid)))));
 - 
 - FIXME: Investigate and fix this properly.
 -}
yesod_url_render_hack :: String -> String
yesod_url_render_hack = parsecAndReplace $ do
	string "(toHtml"
	whitespace
	string "(\\"
	whitespace
	wtf <- token
	whitespace
	string "->"
	whitespace
	renderer <- token
	whitespace
	string wtf
	whitespace
	return $ "(toHtml (flip " ++ renderer ++ " "
  where
	whitespace :: Parser String
	whitespace = many $ oneOf " \t\r\n"

	token :: Parser String
	token = many1 $ satisfy isAlphaNum <|> oneOf "_"

{- Use exported symbol. -}
text_builder_hack :: String -> String
text_builder_hack = replace "Data.Text.Lazy.Builder.Internal.fromText" "Data.Text.Lazy.Builder.fromText"

{- Given a Parser that finds strings it wants to modify,
 - and returns the modified string, does a mass 
 - find and replace throughout the input string.
 - Rather slow, but crazy powerful. -}
parsecAndReplace :: Parser String -> String -> String
parsecAndReplace p s = case parse find "" s of
	Left e -> s
	Right l -> concatMap (either (\c -> [c]) id) l
  where
  	find :: Parser [Either Char String]
	find = many $ try (Right <$> p) <|> (Left <$> anyChar)

main :: IO ()
main = go =<< getArgs
  where
	go (destdir:log:header:[]) = run destdir log (Just header)
	go (destdir:log:[]) = run destdir log Nothing
  	go _ = error "usage: EvilSplicer destdir logfile [headerfile]"

	run destdir log mheader = do
		r <- parseFromFile splicesExtractor log
		case r of
			Left e -> error $ show e
			Right splices -> do
				let groups = groupBy (\a b -> splicedFile a == splicedFile b) splices
				imports <- maybe (return Nothing) (catchMaybeIO . readFile) mheader
				mapM_ (applySplices destdir imports) groups