aboutsummaryrefslogtreecommitdiff
path: root/exampleData/headlessRuns/wikipediaRun.json
diff options
context:
space:
mode:
authorGravatar Benjamin Jones <bjones@galois.com>2013-02-11 19:31:26 -0800
committerGravatar Benjamin Jones <bjones@galois.com>2013-02-11 19:54:19 -0800
commit53c8d9316886fc5704aa638ef9d696029618caff (patch)
tree3ab4bcb6a01f6a3e4cf80d3702cfba3303430591 /exampleData/headlessRuns/wikipediaRun.json
parent6ad504fcb173503f530296bf54330b7fb841f1a5 (diff)
cleaned up exampleData/headlessRuns/
Diffstat (limited to 'exampleData/headlessRuns/wikipediaRun.json')
-rw-r--r--exampleData/headlessRuns/wikipediaRun.json14
1 files changed, 14 insertions, 0 deletions
diff --git a/exampleData/headlessRuns/wikipediaRun.json b/exampleData/headlessRuns/wikipediaRun.json
new file mode 100644
index 0000000..98f871b
--- /dev/null
+++ b/exampleData/headlessRuns/wikipediaRun.json
@@ -0,0 +1,14 @@
+/*
+ * This run crawls Wikipedia, starting at the main page, going to a depth
+ * of 2, with a total of 20 pages maximum crawled, and with a 1000 ms delay
+ * between queries (for politeness).
+ */
+{
+ 'rulePath' : '../ruleSets/',
+ 'crawlType' : '2 20 1000 *wikipedia.org*',
+ 'runs': [
+ { 'url': 'http://en.wikipedia.org/wiki/Main_Page', 'ruleSet': 'headingRules.json' },
+ { 'url': 'http://en.wikipedia.org/wiki/Main_Page', 'ruleSet': 'miscRules.json' },
+ { 'url': 'http://en.wikipedia.org/wiki/Main_Page', 'ruleSet': 'textRules.json' }
+ ]
+}