diff options
author | Benjamin Jones <bjones@galois.com> | 2013-02-11 19:31:26 -0800 |
---|---|---|
committer | Benjamin Jones <bjones@galois.com> | 2013-02-11 19:54:19 -0800 |
commit | 53c8d9316886fc5704aa638ef9d696029618caff (patch) | |
tree | 3ab4bcb6a01f6a3e4cf80d3702cfba3303430591 /exampleData/headlessRuns/wikipediaRun.json | |
parent | 6ad504fcb173503f530296bf54330b7fb841f1a5 (diff) |
cleaned up exampleData/headlessRuns/
Diffstat (limited to 'exampleData/headlessRuns/wikipediaRun.json')
-rw-r--r-- | exampleData/headlessRuns/wikipediaRun.json | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/exampleData/headlessRuns/wikipediaRun.json b/exampleData/headlessRuns/wikipediaRun.json new file mode 100644 index 0000000..98f871b --- /dev/null +++ b/exampleData/headlessRuns/wikipediaRun.json @@ -0,0 +1,14 @@ +/* + * This run crawls Wikipedia, starting at the main page, going to a depth + * of 2, with a total of 20 pages maximum crawled, and with a 1000 ms delay + * between queries (for politeness). + */ +{ + 'rulePath' : '../ruleSets/', + 'crawlType' : '2 20 1000 *wikipedia.org*', + 'runs': [ + { 'url': 'http://en.wikipedia.org/wiki/Main_Page', 'ruleSet': 'headingRules.json' }, + { 'url': 'http://en.wikipedia.org/wiki/Main_Page', 'ruleSet': 'miscRules.json' }, + { 'url': 'http://en.wikipedia.org/wiki/Main_Page', 'ruleSet': 'textRules.json' } + ] +} |