changeset 0:3b37d71af924 default tip

iniitial
author dwinter
date Tue, 26 Feb 2013 15:50:30 +0100
parents
children
files .classpath .gitignore .project .settings/org.eclipse.jdt.core.prefs CHANGES.txt KEYS LICENSE.txt NOTICE.txt README.txt build.xml conf/adaptive-mimetypes.txt conf/automaton-urlfilter.txt conf/automaton-urlfilter.txt.template conf/configuration.xsl conf/domain-suffixes.xml conf/domain-suffixes.xsd conf/domain-urlfilter.txt conf/domainblacklist-urlfilter.txt conf/host-urlnormalizer.txt conf/httpclient-auth.xml conf/httpclient-auth.xml.template conf/log4j.properties conf/mpiwg-dom-parser.xml conf/mpiwg-parser.xml conf/nutch-conf.xsl conf/nutch-default.xml conf/nutch-site.out.xml conf/nutch-site.xml conf/nutch-site.xml.template conf/parse-plugins.dtd conf/parse-plugins.xml conf/prefix-urlfilter.txt conf/prefix-urlfilter.txt.template conf/regex-normalize.xml conf/regex-normalize.xml.template conf/regex-urlfilter.txt conf/regex-urlfilter.txt.template conf/schema-solr4.xml conf/schema.xml conf/solrindex-mapping.xml conf/subcollections.xml conf/subcollections.xml.template conf/suffix-urlfilter.txt conf/suffix-urlfilter.txt.template default.properties src/plugin/build-plugin.xml src/plugin/build.xml src/plugin/parse-MPIWG-metaTag/build.xml src/plugin/parse-MPIWG-metaTag/ivy.xml src/plugin/parse-MPIWG-metaTag/plugin.xml src/plugin/parse-MPIWG-metaTag/src/java/de/mpiwg/itgroup/indexer/urlmeta/URLMetaIndexingFilter.java src/plugin/parse-MPIWG-metaTag/src/java/de/mpiwg/itgroup/scoring/urlmeta/URLMetaScoringFilter.java src/plugin/parse-mpiwg/README.txt src/plugin/parse-mpiwg/build.xml src/plugin/parse-mpiwg/ivy.xml src/plugin/parse-mpiwg/plugin.xml src/plugin/parse-mpiwg/sample/testMetatags.html src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGDomFilter.java src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGDomParser.java src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGFilter.java src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGParser.java src/plugin/parse-mpiwg/src/test/org/apache/nutch/parse/html/TestMetatagParser.java src/plugin/plugin.dtd urls/seed.txt
diffstat 63 files changed, 18461 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.classpath	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry exported="true" kind="lib" path="conf"/>
+	<classpathentry exported="true" kind="lib" path="lib/nekohtml.jar"/>
+	<classpathentry exported="true" kind="lib" path="lib/tagsoup-1.2.jar"/>
+	<classpathentry exported="true" kind="lib" path="lib/automaton.jar"/>
+	<classpathentry exported="true" kind="lib" path="lib/rome-0.9.jar"/>
+	<classpathentry kind="src" path="src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-MPIWG-metaTag/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-validator/src/java"/>
+	<classpathentry kind="src" path="src/plugin/headings/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-zip/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-file/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-domain/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-basic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-anchor/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-tika/src/java"/>
+	<classpathentry kind="src" path="src/plugin/microformats-reltag/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-basic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-automaton/src/java"/>
+	<classpathentry kind="src" path="src/plugin/scoring-opic/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-pass/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-domainblacklist/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-swf/src/java"/>
+	<classpathentry kind="src" path="src/plugin/scoring-link/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-ftp/src/java"/>
+	<classpathentry kind="src" path="src/plugin/tld/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlmeta/src/java"/>
+	<classpathentry kind="src" path="src/plugin/scoring-depth/src/java"/>
+	<classpathentry kind="src" path="src/plugin/lib-http/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-ext/src/java"/>
+	<classpathentry kind="src" path="src/plugin/feed/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-http/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-regex/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-static/src/java"/>
+	<classpathentry kind="src" path="src/plugin/subcollection/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlnormalizer-host/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-suffix/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-prefix/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-html/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-js/src/java"/>
+	<classpathentry kind="src" path="src/plugin/urlfilter-regex/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-more/src/java"/>
+	<classpathentry kind="src" path="src/plugin/protocol-httpclient/src/java"/>
+	<classpathentry kind="src" path="src/plugin/language-identifier/src/java"/>
+	<classpathentry kind="src" path="src/plugin/index-metadata/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-metatags/src/java"/>
+	<classpathentry kind="src" path="src/plugin/lib-regex-filter/src/java"/>
+	<classpathentry kind="src" path="src/plugin/creativecommons/src/java"/>
+	<classpathentry kind="src" path="src/plugin/parse-mpiwg/src/java"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
+	<classpathentry kind="lib" path="src/plugin/parse-swf/lib/javaswf.jar"/>
+	<classpathentry kind="con" path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=nutch-mpiwg-plugins&amp;ivyXmlPath=ivy%2Fivy.xml&amp;confs=*"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.gitignore	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,7 @@
+conf/*.txt
+conf/*.xml
+conf/hadoop-env.sh
+conf/slaves
+build/
+runtime/
+logs/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.project	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>nutch-mpiwg-plugins</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.apache.ivyde.eclipse.ivynature</nature>
+	</natures>
+</projectDescription>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.settings/org.eclipse.jdt.core.prefs	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,11 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CHANGES.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,1851 @@
+Nutch Change Log
+
+(trunk): Current Development
+
+* NUTCH-1274 Fix [cast] javac warnings (Tejas Patil via lewismc)
+
+* NUTCH-1494 RSS feed plugin seems broken (Sourajit Basak, Tejas Patil and lewismc)
+
+* NUTCH-1127 JUnit test for urlfilter-validator (Tejas Patil via lewismc)
+
+* NUTCH-1119 JUnit test for index-static (Tejas Patil via lewismc)
+
+* NUTCH-1510 Upgrade to Hadoop 1.1.1 (markus)
+
+* NUTCH-1118 JUnit test for index-basic (Tejas Patil via lewismc)
+
+* NUTCH-1331 limit crawler to defined depth (jnioche)
+
+Release 1.6 - 23/11/2012
+
+* NUTCH-1370 Expose exact number of urls injected @runtime (snagel via lewismc)
+
+* NUTCH-1117 JUnit test for index-anchor (lewismc)
+
+* NUTCH-1451 Upgrade automaton jar to 1.11-8 (lewismc)
+
+* NUTCH-1488 bin/nutch to run junit from any directory (snagel via lewismc)
+
+* NUTCH-1493 Error adding field 'contentLength'='' during solrindex using index-more (Nathan Gass via lewismc)
+
+* NUTCH-1491 Strip UTF-8 non-character codepoints in title (Nathan Gass via markus)
+
+* NUTCH-1421 RegexURLNormalizer to only skip rules with invalid patterns (snagel)
+
+* NUTCH-1341 NotModified time set to now but page not modified (markus)
+
+* NUTCH-1215 UpdateDB should not require segment as input (markus)
+
+* NUTCH-1383 IndexingFiltersChecker to show error message instead of null pointer exception (snagel)
+
+* NUTCH-1476 SegmentReader getStats should set parsed = -1 if no parsing took place (snagel)
+
+* NUTCH-1252 SegmentReader -get shows wrong data (snagel)
+
+* NUTCH-1344 BasicURLNormalizer to normalize https same as http (snagel)
+
+* NUTCH-706 Url regex normalizer: pattern for session id removal not to match "newsId" (Meghna Kukreja via snagel)
+
+* NUTCH-1415 release packages to contain top level folder apache-nutch-x.x (snagel)
+
+* NUTCH-1441 AnchorIndexingFilter should use plain HashSet (ferdy via lewismc)
+
+* NUTCH-1470 Ensure test files are included for runtime testing (lewismc)
+
+* NUTCH-1434 Indexer to delete robots noindex (markus)
+
+* NUTCH-1443 Solr schema version is invalid (markus)
+
+* NUTCH-1417 Remove o.a.n.metadata.Office (lewismc)
+
+* NUTCH-1376 Add description parameter to every ant task (lewismc)
+
+* NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml (shekhar sharma via lewismc)
+
+* NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar sharma via lewismc)
+
+* NUTCH-1433 Upgrade to Tika 1.2 (jnioche)
+
+* NUTCH-1388 Optionally maintain custom fetch interval despite AdaptiveFetchSchedule (markus)
+
+* NUTCH-1430 Freegenerator records overwrite CrawlDB records with AdaptiveFetchSchedule (markus)
+
+* NUTCH-1087 Deprecate crawl command and replace with example script (jnioche)
+
+* NUTCH-1306 Add option to not commit and clarify existing solr.commit.size (ferdy)
+
+* NUTCH-1405 Allow to overwrite CrawlDatum's with injected entries (markus)
+
+* NUTCH-1412 Upgrade commons lang (markus)
+
+* NUTCH-1251 SolrDedup to use proper Lucene catch-all query (Arkadi Kosmynin via markus)
+
+* NUTCH-1407 BasicIndexingFilter to optionally add domain field (markus)
+
+* NUTCH-1408 RobotRulesParser main doesn't take URL's (markus)
+
+* NUTCH-1300 Indexer to filter normalize URL's (markus)
+
+* NUTCH-1330 WebGraph OutlinkDB to preserve back up (markus)
+
+* NUTCH-1319 HostNormalizer plugin (markus)
+
+* NUTCH-1386 Headings filter not to add empty values (markus)
+
+* NUTCH-1356 ParseUtil use ExecutorService instead of manually thread handling (ferdy via markus)
+
+* NUTCH-1352 Improve regex urlfilters/normalizers synchronization (ferdy via markus)
+
+* NUTCH-1024 Dynamically set fetchInterval by MIME-type (markus)
+
+* NUTCH-1364 Add a counter in Generator for malformed urls (lewismc)
+
+* NUTCH-1262 Map `duplicating` content-types to a single type (markus)
+
+* NUTCH-1385 More robust plug-in order properties in nutch-site.xml (Andy Xue via markus)
+
+* NUTCH-1336 Optionally not index db_notmodified pages (markus)
+
+* NUTCH-1346 Follow outlinks to ignore external (markus)
+
+* NUTCH-1320 IndexChecker and ParseChecker choke on IDN's (markus)
+
+* NUTCH-1351 DomainStatistics to aggregate by TLD (markus)
+
+* NUTCH-1381 Allow to override default subcollection field name (markus)
+
+* NUTCH-XX Commit to add configuration for separation of ant distribution targets (lewismc + jnioche)
+
+Release 1.5.1 - 07/10/2012
+
+* NUTCH-1404 Nutch script fails to find job file in deploy mode (sidabatra, jnioche)
+
+* NUTCH-1415 release packages to contain top level folder apache-nutch-x.x (snagel via lewismc)
+
+* NUTCH-1400 Remove developer -core option for bin/nutch (jnioche)
+
+* NUTCH-1384 Typo in ParseSegment's run-method (Matthias Agethle via markus)
+
+* NUTCH-1398 Upgrade to Hadoop 1.0.3 (jnioche)
+
+Release 1.5 - 04/15/2012
+
+* NUTCH-1208 Don't include KEYS file in bin distribution (jnioche)
+
+* NUTCH-1234 Upgrade to Tika 1.1 (jnioche, markus)
+
+* NUTCH-809 Parse-metatags plugin (jnioche)
+
+* NUTCH-1310 Nutch to send HTTP-accept header (markus)
+
+* NUTCH-1305 Domain(blacklist)URLFilter to trim entries (markus)
+
+* NUTCH-1307 Improve formatting of ant targets for clearer project help (lewismc)
+
+* NUTCH-1299 LinkRank inverter to ignore records without Node (markus)
+
+* NUTCH-1258 MoreIndexingFilter should be able to read Content-Type from both parse metadata and content metadata (jnioche, markus)
+
+* NUTCH-1293 IndexingFiltersChecker to store detected content type in crawldatum metadata (markus)
+
+* NUTCH-1291 Fetcher to stringify exception on // unexpected exception (markus)
+
+* NUTCH-965 Skip parsing for truncated documents (alexis, lewismc, ferdy)
+
+* NUTCH-1210 DomainBlacklistFilter (markus)
+
+* NUTCH-1193 Incorrect url transform to lowercase: parameter solr (Eduardo dos Santos Leggiero via lewismc)
+
+* NUTCH-1272 Wrong property name for index-static in nutch-default.xml (Daniel Baur via jnioche)
+
+* NUTCH-1259 Store detected content-type in crawldatum metadata (jnioche, markus)
+
+* NUTCH-1266 Subcollection to optionally write to configured fields (markus)
+
+* NUTCH-1005 Parse headings plugin (markus)
+
+* NUTCH-1264 Configurable indexing plugin index-metadata (jnioche)
+
+* NUTCH-1242 Allow disabling of URL Filters in ParseSegment (Edward Drapkin via markus)
+
+* NUTCH-1256 WebGraph to dump host + score (markus)
+
+* NUTCH-1260 Fetcher should log fetching of redirects (Sebastian Nagel via markus)
+
+* NUTCH-1255 Change ivy.xml of all plugins to remove "nutch.root" property (ferdy)
+
+* NUTCH-1248 Generator to select on status (markus)
+
+* NUTCH-1177 Generator to select on retry interval (markus)
+
+* NUTCH-1246 Upgrade to Hadoop 1.0.0 (jnioche)
+
+* NUTCH-1139 Indexer to delete gone documents (markus)
+
+* NUTCH-1244 CrawlDBDumper to filter by regex (markus)
+
+* NUTCH-1237 Improve javac arguements for more verbose ouput (lewismc)
+
+* NUTCH-1236 Add link to site documentation to download older versions of Nutch (lewismc)
+
+* NUTCH-1146 Prevent generation of _SUCCESS files in output (jnioche)
+
+* NUTCH-1232 Remove site field from index-basic (markus)
+
+* NUTCH-1239 Webgraph should remove deleted pages from segment input (markus)
+
+* NUTCH-1238 Fetcher throughput threshold must start before feeder finished (markus)
+
+* NUTCH-1138 remove LogUtil from trunk and nutch gora (lewismc)
+
+* NUTCH-1231 Upgrade to Tika 1.0 (markus)
+
+* NUTCH-1230 MimeType API deprecated and breaks with Tika 1.0 (markus)
+
+* NUTCH-1235 Upgrade to new Hadoop 0.20.205.0 (markus)
+
+* NUTCH-1217 Update NOTICE.txt to drop some copyrights (lewismc)
+
+* NUTCH-1129 Add freegenerator, domainstats and crawldbscanner to log4j (markus)
+
+* NUTCH-1184 Fetcher to parse and follow Nth degree outlinks (markus)
+
+* NUTCH-1221 Migrate DomainStatistics to MapReduce API (markus)
+
+* NUTCH-1216 Add trivial comment to lib/native/README.txt (lewismc)
+
+* NUTCH-1214 DomainStats tool should be named for what it's doing (markus)
+
+* NUTCH-1213 Pass additional SolrParams when indexing to Solr (ab)
+
+* NUTCH-1211 URLFilterChecker command line help doesn't inform user of 
+  STDIN requirements (mattmann)
+
+* NUTCH-1209 Output from ParserChecker Url missing a newline (mattmann)
+
+* NUTCH-1207 ParserChecker to output signature (markus)
+
+* NUTCH-1090 InvertLinks should inform when ignoring internal links (Marek Backmann via markus)
+
+* NUTCH-1174 Outlinks are not properly normalized (markus)
+
+* NUTCH-1203 ParseSegment to show number of milliseconds per parse (markus)
+
+* NUTCH-1185 Decrease solr.commit.size to 250 (markus)
+
+* NUTCH-1180 UpdateDB to backup previous CrawlDB (markus)
+
+* NUTCH-1173 DomainStats doesn't count db_not_modified (markus)
+
+* NUTCH-1155 Host/domain limit in generator is generate.max.count+1 (markus)
+
+* NUTCH-1061 Migrate MoreIndexingFilter from Apache ORO to java.util.regex (markus)
+
+* NUTCH-1178 Incorrect CSV header CrawlDatumCsvOutputFormat (markus)
+
+* NUTCH-1142 Normalization and filtering in WebGraph (markus)
+
+* NUTCH-1153 LinkRank not to log all keys and not to write Hadoop _SUCCESS file (markus)
+
+Release 1.4 - 11/4/2011
+
+* NUTCH-1195 Add Solr 4x (trunk) example schema (ab)
+
+* NUTCH-1192 Add '/runtime' to svn ignore (ferdy)
+
+* NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc)
+
+* NUTCH-797 Fix parse-tika and parse-html to use relative URL resolution per RFC-3986
+  (Robert Hohman, ab)
+
+* NUTCH-1154 Upgrade to Tika 0.10. NOTE: Tika's new RTF parser may ignore more
+  text in malformed documents than previously - see TIKA-748 for details. (ab)
+
+* NUTCH-1109 Add Sonar targets to Ant build.xml (lewismc) 
+
+* NUTCH-1152 Upgrade SolrJ to version 3.4.0 (ab)
+
+* NUTCH-1136 Ant pmd target is broken (lewismc)
+
+* NUTCH-1058 Upgrade Solr schema to version 1.4 (markus)
+
+* NUTCH-1137 LinkDB invertlinks other options ignored when using -dir option (Sebastian Nagel, markus)
+
+* NUTCH-1141 Configurable Fetcher queue depth (jnioche)
+
+* NUTCH-1091 Remove commons logging dependency from Nutch branch and trunk (lewismc)
+
+* NUTCH-672 allow unit tests to be run from bin/nutch (Todd Lipton via lewismc)
+
+* NUTCH-937 Put plugins in classes/plugins in job file (Claudio Martella, Ferdy Galema, jnioche)
+
+* NUTCH-623 Change plugin source directory "languageidentifier" to "language-identifier" (lewismc)
+
+* NUTCH-1074 topN is ignored with maxNumSegments and generate.max.count (Robert Thomson via markus)
+
+* NUTCH-1078 Upgrade all instances of commons logging to slf4j (with log4j backend) (lewismc)
+
+* NUTCH-1115 Option to disable fixing embedded URL parameters in DomContentUtils (markus)
+
+* NUTCH-1114 Attr file missing in domain filter (markus)
+
+* NUTCH-1067 Configure minimum throughput for fetcher (markus)
+
+* NUTCH-1102 Fetcher to rely on fetcher.parse directive (markus)
+
+* NUTCH-1110 UpdateDB must not write _success file (markus)
+
+* NUTCH-1105 Max content length option for index-basic (markus)
+
+* NUTCH-940 static field plugin (Claudio Martella via lewismc)
+
+* NUTCH-914 Implement Apache Project Branding Requirements (lewismc)
+
+* NUTCH-1095 remove i18n from Nutch site to archive and legacy secton of wiki (lewismc)
+
+* NUTCH-1101 Option to purge db_gone records with updatedb (markus)
+
+* NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy Galema via jnioche)
+
+* NUTCH-1073 Rename parameters 'fetcher.threads.per.host.by.ip' and 'fetcher.threads.per.host' (jnioche)
+
+* NUTCH-1089 Short compressed pages caused exception in protocol-httpclient (Simone Frenzel via jnioche)
+
+* NUTCH-1085 Nutch script does not require HADOOP_HOME (jnioche)
+
+* NUTCH-1075 Delegate language identification to Tika (jnioche)
+
+* NUTCH-1049 Add classes to bin/nutch script (markus)
+
+* NUTCH-1051 Export WebGraph node scores for Solr.ExternalFileField (markus)
+
+* NUTCH-1083 ParserChecker implements Tools (jnioche)
+
+* NUTCH-1082 IndexingFiltersChecker utility does not list multi valued fields (markus)
+
+* NUTCH-1004 Do not index empty values for title field (markus)
+
+* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via jnioche)
+
+* NUTCH-1069 Readlinkdb broken on Hadoop > 0.20 (markus)
+
+* NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have invalid scores (jnioche)
+
+* NUTCH-1028 Log urls when parsing (markus)
+
+* NUTCH-1065 New mvn.template (lewismc)
+
+* NUTCH-1072 Display number and size of queues in Fetcher status (jnioche)
+
+* NUTCH-1071 Crawldb update displays total number of URLs per status (jnioche)
+
+* NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)
+
+* NUTCH-1057 Fetcher thread time out configurable (markus)
+
+* NUTCH-1037 Option to deduplicate anchors prior to indexing (markus)
+
+* NUTCH-1050 Add segmentDir option to WebGraph (markus)
+
+* NUTCH-1055 upgrade package.html file in language identifier plugin (lewismc)
+
+* NUTCH-1059 Remove convdb command from /bin/nutch (lewismc)
+
+* NUTCH-1019 Edit comment in org.apache.nutch.crawl.Crawl to reflect removal of legacy (lewismc)
+
+* NUTCH-1023 Trivial error in error message for org.apache.nutch.crawl.LinkDbReader (lewismc)
+
+* NUTCH-1043 Add pattern for filtering .js in default url filters (jnioche)
+
+* NUTCH-1054 LinkDB optional during indexing (jnioche)
+
+* NUTCH-1029 Readdb throws EOFException (markus)
+
+* NUTCH-1036 Solr jobs should increment counters in Reporter (markus)
+
+* NUTCH-987 Support HTTP auth for Solr communication (markus)
+
+* NUTCH-1027 Degrade log level of `can't find rules for scope` (markus)
+
+* NUTCH-783 IndexingFiltersChecker utility (jnioche via markus)
+
+* NUTCH-1030 WebgraphDB program requires manually added directories (markus)
+
+* NUTCH-1011 Normalize duplicate slashes in URL's (markus)
+
+* NUTCH-993 NullPointerException at FetcherOutputFormat.checkOutputSpecs (Christian Guegi via jnioche)
+
+* NUTCH-1013 Migrate RegexURLNormalizer from Apache ORO to java.util.regex (markus)
+
+* NUTCH-1016 Strip UTF-8 non-character codepoints and add logging for SolrWriter (markus)
+
+* NUTCH-1012 Cannot handle illegal charset $charset (markus)
+
+* NUTCH-1022 Upgrade version number of Nutch agent in conf (markus)
+
+* NUTCH-295 Description for fetcher.threads.fetch property (kubes via markus)
+
+* NUTCH-1000 Add option not to commit to Solr (markus)
+
+* NUTCH-1006 MetaEquiv with single quotes not accepted (markus)
+
+* NUTCH-1010 ContentLength not trimmed (markus)
+
+Release 1.3 - 6/4/2011
+
+* NUTCH-995 Generate POM file using the Ivy makepom task (mattmann, jnioche, Gabriele Kahlout)
+
+* NUTCH-1003 task 'package' does not reflect the new organisation of the code (jnioche)
+
+* NUTCH-994 Fine tune Solr schema (markus)
+
+* NUTCH-997 IndexingFitlers to store Date objects instead of Strings (jnioche)
+
+* NUTCH-996 Indexer adds solr.commit.size+1 docs (markus)
+
+* NUTCH-983 Upgrade SolrJ to 3.1 (markus, jnioche)
+
+* NUTCH-989 Index-basic plugin and Solr schema now use date fieldType for tstamp field (markus)
+
+* NUTCH-888 Remove parse-rss and add tests for rss to parse-tika (jnioche)
+
+* NUTCH-991 SolrDedup must issue a commit (markus)
+
+* NUTCH 986 SolrDedup fails due to date incorrect format (markus)
+
+* NUTCH-977 SolrMappingReader uses hardcoded configuration parameter name for mapping file (markus)
+
+* NUTCH-976 Rename properties solrindex.* to solr.* (markus)
+
+* NUTCH-890 Fix IllegalAccessError with slf4j used in Solrj (markus)
+
+* NUTCH-891 Subcollection plugin won't require blacklist any more (markus)
+
+* NUTCH-972 CrawlDbMerger doesn't break on non-existent input (Gabriele Kahlout via jnioche)
+
+* NUTCH-967 Upgrade to Tika 0.9 (jnioche)
+
+* NUTCH-975 Fix missing/wrong headers in source files (markus)
+
+* NUTCH-963 Add support for deleting Solr documents with STATUS_DB_GONE in CrawlDB (Claudio Martella, markus)
+
+* NUTCH-825 Publish nutch artifacts to central maven repository (mattmann, jnioche)
+
+* NUTCH-962 max. redirects not handled correctly: fetcher stops at max-1 redirects (Sebastian Nagel via ab)
+
+* NUTCH-921 Reduce dependency of Nutch on config files (ab)
+
+* NUTCH-876 Remove remaining robots/IP blocking code in lib-http (ab)
+
+* NUTCH-872 Change the default fetcher.parse to FALSE (ab)
+
+* NUTCH-564 External parser supports encoding attribute (Antony Bowesman, mattmann)
+
+* NUTCH-964 Upgraded Xerces to 2.91, ERROR conf.Configuration - Failed to set setXIncludeAware (markus)
+
+* NUTCH-927 Fetcher.timelimit.mins is invalid when depth is greater than 1 (Wade Lau via jnioche)
+
+* NUTCH-824 Crawling - File Error 404 when fetching file with an hexadecimal character in the file name (Michela Becchi via jnioche)
+
+* NUTCH-954 Strict application of Content-Length limit for http protocols (Alexis Detreglode via jnioche)
+
+* NUTCH-950 DomainURLFilter throws NPE on bogus urls (Alexis Detreglode via jnioche)
+
+* NUTCH-935 basicurlnormalizer removes unnecessary /./ in URLs (Stondet via markus)
+
+* NUTCH-912 MoreIndexingFilter does not parse docx and xlsx date formats (Markus Jelsma, jnioche)
+
+* NUTCH-886 A .gitignore file for Nutch (dogacan)
+
+* NUTCH-930 Remove remaining dependencies on Lucene API (ab)
+
+* NUTCH-883 Remove unused parameters from nutch-default.xml (jnioche) 
+
+* NUTCH-936 LanguageIdentifier should not set empty lang field on NutchDocument (Markus Jelsma via jnioche)
+
+* NUTCH-787 ScoringFilters should not override the injected score (jnioche)
+
+* NUTCH-949 Conflicting ANT jars in classpath (jnioche)
+
+* NUTCH-863 Benchmark and a testbed proxy server (ab)
+
+* NUTCH-844 Improve NutchConfiguration (ab)
+
+* NUTCH-845 Native hadoop libs not available through maven (ab)
+
+* NUTCH-843 Separate the build and runtime environments (ab)
+
+* NUTCH-821 Use ivy in nutch builds (Enis Soztutar, jnioche)
+
+* NUTCH-837 Remove search servers and Lucene dependencies (ab)
+
+* NUTCH-836 Remove deprecated parse plugins (jnioche)
+
+* NUTCH-939 Added -dir command line option to SolrIndexer (Claudio Martella via ab)
+
+* NUTCH-948 Remove Lucene dependencies (ab)
+
+Release 1.2 - 09/18/2010
+
+* NUTCH-901 Make index-more plug-in configurable (Markus Jelsma via mattmann)
+
+* NUTCH-908 Infinite Loop and Null Pointer Bugs in Searching (kubes via mattmann)
+
+* NUTCH-906 Nutch OpenSearch sometimes raises DOMExceptions (Asheesh Laroia via ab)
+
+* NUTCH-862 HttpClient null pointer exception (Sebastian Nagel via ab)
+
+* NUTCH-905 Configurable file protocol parent directory crawling (Thorsten Scherler, mattmann, ab)
+
+* NUTCH-877 Allow setting of slop values for non-quote phrase queries on query-basic plugin (kubes via jnioche)
+
+* NUTCH-716 Make subcollection index filed multivalued (Dmitry Lihachev via jnioche)
+
+* NUTCH-878 ScoringFilters should not override the injected score 
+
+* NUTCH-870 Injector should add the metadata before calling injectedScore (jnioche via mattmann)
+
+* NUTCH-858 No longer able to set per-field boosts on lucene documents (ab)
+
+* NUTCH-869 Add parse-html back (jnioche)
+
+* NUTCH-871 MoreIndexingFilter missing date format (Max Lynch via mattmann)
+
+* NUTCH-696 Timeout for Parser (ab, jnioche)
+
+* NUTCH-857 DistributedBeans should not close their RPC counterparts (kubes)
+  
+* NUTCH-855 ScoringFilter and IndexingFilter: To allow for the propagation of URL Metatags 
+  and their subsequent indexing (Scott Gonyea via mattmann)
+
+* NUTCH-677 Segment merge filering based on segment content (Marcin Okraszewski via mattmann)
+
+* NUTCH-774 Retry interval in crawl date is set to 0 (Reinhard Schwab via mattmann)
+
+* NUTCH-697 Generate log output for solr indexer and dedup (Dmitry Lihachev, Jeroen van Vianen via mattmann)
+
+* NUTCH-850 SolrDeleteDuplicates needs to clone the SolrRecord objects (jnioche)
+
+* NUTCH-838 Add timing information to all Tool classes (Jeroen van Vianen, mattmann)
+
+* NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
+
+* NUTCH-831 Allow configuration of how fields crawled by Nutch are stored / indexed / 
+  tokenized (Jeroen van Vianen via mattmann)
+
+* NUTCH-278 Fetcher-status might need clarification: kbit/s instead of kb/s shown (Alex McLintock via mattmann)
+
+* NUTCH-833 Website is still Lucene branded (mattmann, Alex McLintock)
+
+* NUTCH-832 Website menu has lots of broken links - in particular the API docs (Alex McLintock via mattmann)
+
+Release 1.1 - 2010-06-06
+
+* NUTCH-819 Included Solr schema.xml and solrindex-mapping.xml don't play together (ab)
+
+* NUTCH-818 Bugfix : Parse-tika uses minorCodes instead of majorCodes in ParseStatus (jnioche)
+
+* NUTCH-816 Add zip target to build.xml (mattmann)
+
+* NUTCH-732 Subcollection plugin not working (Filipe Antunes, ab)
+
+* NUTCH-815 Invalid blank line before If-Modified-Since header (Pascal Dimassimo via ab)
+
+* NUTCH-814 SegmentMerger bug (Rob Bradshaw, ab)
+
+* NUTCH-812 Crawl.java incorrectly uses the Generator API resulting in NPE (Phil Barnett via mattmann and ab)
+
+* NUTCH-810 Upgrade to Tika 0.7 (jnioche)
+
+* NUTCH-785 Copy metadata from origin URL when redirecting in Fetcher + call scfilters.initialScore on newly created URL (jnioche)
+
+* NUTCH-779 Mechanism for passing metadata from parse to crawldb (jnioche)
+
+* NUTCH-784 CrawlDBScanner (jnioche)
+
+* NUTCH-762 Generator can generate several segments in one parse of the crawlDB (jnioche)
+
+* NUTCH-740 Configuration option to override default language for fetched pages (Marcin Okraszewski via jnioche)
+
+* NUTCH-803 Upgrade to Hadoop 0.20.2 (ab)
+
+* NUTCH-787 Upgrade Lucene to 3.0.1. (Dawid Weiss via ab)
+
+* NUTCH-796 Zero results problems difficult to troubleshoot due to lack of logging (ab)
+
+* NUTCH-801 Remove RTF and MP3 parse plugins (jnioche)
+
+* NUTCH-798 Upgrade to SOLR1.4 and its dependencies (jnioche)
+
+* NUTCH-799 SOLRIndexer to commit once all reducers have finished (jnioche)
+
+* NUTCH-782 Ability to order htmlparsefilters (jnioche)
+
+* NUTCH-719 fetchQueues.totalSize incorrect in Fetcher (Steven Denny via jnioche) 
+
+* NUTCH-790 Some external javadoc links are broken (siren)
+
+* NUTCH-766 Tika parser (jnioche via mattmann)
+
+* NUTCH-786 Improvement to the list of suffix domains (jnioche)
+
+* NUTCH-775 Enhance searcher interface (siren)
+
+* NUTCH-781 Update Tika to v0.6 (jnioche)
+
+* NUTCH-269 CrawlDbReducer: OOME because no upper-bound on inlinks count (stack + jnioche)
+
+* NUTCH-655 Injecting Crawl metadata (jnioche)
+
+* NUTCH-658 Use counters to report fetching and parsing status (jnioche)
+
+* NUTCH-777 Upgrading to jetty6 broke unit tests (mattmann)
+
+* NUTCH-767 Update Tika to v0.5 for the MimeType detection (Julien Nioche via ab)
+
+* NUTCH-769 Fetcher to skip queues for URLS getting repeated exceptions
+  (Julien Nioche via ab)
+
+* NUTCH-768 - Upgrade Nutch 1.0 to use Hadoop 0.20.1, also upgrades Xerces to 
+  version 2.9.1. (kubes)
+  
+* NUTCH-712 ParseOutputFormat should catch java.net.MalformedURLException
+  coming from normalizers (Julien Nioche via ab)
+
+* NUTCH-741 Job file includes multiple copies of nutch config files
+  (Kirby Bohling via ab)
+
+* NUTCH-739 SolrDeleteDuplications too slow when using hadoop (Dmitry Lihachev via ab)
+
+* NUTCH-738 Close SegmentUpdater when FetchedSegments is closed
+  (Martina Koch, Kirby Bohling via ab)
+
+* NUTCH-746 NutchBeanConstructor does not close NutchBean upon contextDestroyed,
+  causing resource leak in the container. (Kirby Bohling via ab)
+
+* NUTCH-772 Upgrade Nutch to use Lucene 2.9.1 (ab)
+
+* NUTCH-760 Allow field mapping from Nutch to Solr index (David Stuart, ab)
+
+* NUTCH-761 Avoid cloning CrawlDatum in CrawlDbReducer (Julien Nioche, ab)
+
+* NUTCH-753 Prevent new Fetcher from retrieving the robots twice (Julien Nioche via ab)
+
+* NUTCH-773 - Some minor bugs in AbstractFetchSchedule (Reinhard Schwab via ab)
+
+* NUTCH-765 - Allow Crawl class to call Either Solr or Lucene Indexer (kubes)
+
+* NUTCH-735 - crawl-tool.xml must be read before nutch-site.xml when
+  invoked using crawl command (Susam Pal via dogacan)
+
+* NUTCH-721 - Fetcher2 Slow (Julien Nioche via dogacan)
+
+* NUTCH-702 - Lazy Instanciation of Metadata in CrawlDatum (Julien Nioche via dogacan)
+
+* NUTCH-707 - Generation of multiple segments in multiple runs returns only 1 segment
+  (Michael Chen, ab)
+
+* NUTCH-730 - NPE in LinkRank if no nodes with which to create the WebGraph
+  (Dennis Kubes via ab)
+
+* NUTCH-731 - Redirection of robots.txt in RobotRulesParser (Julien Nioche via ab)
+
+* NUTCH-757 - RequestUtils getBooleanParameter() always returns false
+  (Niall Pemberton via ab)
+
+* NUTCH-754 - Use GenericOptionsParser instead of FileSystem.parseArgs() (Julien
+  Nioche via ab)
+
+* NUTCH-756 - CrawlDatum.set() does not reset Metadata if it is null (Julien Nioche
+  via ab)
+
+* NUTCH-679 - Fetcher2 implementing Tool (Julien Nioche via ab)
+
+* NUTCH-758 - Set subversion eol-style to "native" (Niall Pemberton via ab)
+
+Release 1.0 - 2009-03-23
+
+ 1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)
+
+ 2. NUTCH-443 - Allow parsers to return multiple Parse objects.
+    (Dogacan Guney et al, via ab)
+
+ 3. NUTCH-393 - Indexer should handle null documents returned by filters.
+    (Eelco Lempsink via ab)
+
+ 4. NUTCH-456 - Parse msexcel plugin speedup (Heiko Dietze via siren)
+
+ 5. NUTCH-446 - RobotRulesParser should ignore Crawl-delay values of other
+    bots in robots.txt (Dogacan Guney via siren)
+
+ 6. NUTCH-482 - Remove redundant plugin lib-log4j (siren)
+ 
+ 7. NUTCH-483 - Remove redundant commons-logging jar from ontology plugin
+    (siren)
+
+ 8. NUTCH-161 - Change Plain text parser to
+    use parser.character.encoding.default property for fall back encoding
+    (KuroSaka TeruHiko, siren)
+
+ 9. NUTCH-61 - Support for adaptive re-fetch interval and detection of
+    unmodified content. (ab)
+
+10. NUTCH-392 - OutputFormat implementations should pass on Progressable.
+    (cutting via ab)
+
+11. NUTCH-495 - Unnecessary delays in Fetcher2 (dogacan)
+
+12. NUTCH-443 - allow parsers to return multiple Parse object, this will speed 
+    up the rss parser (dogacan via mattmann). This update is a fix and semantics
+    change from the original patch for NUTCH-443. The original patch did not tell
+    the  Indexer to read crawl_parse too so that it can pickup sub-urls' fetch 
+    datums. This patch addresses that issue. Now, if Fetcher gets a null content, 
+    instead of pushing an empty content, it filters the null content.
+    
+13. NUTCH-485 - Change HtmlParseFilter 's to return ParseResult object instead of 
+    Parse object. (Gal Nitzan via dogacan)
+
+14. NUTCH-489 - URLFilter-suffix management of the url path when the url contains 
+    some query parameters. (Emmanuel Joke via dogacan)
+
+15. NUTCH-502 - Bug in SegmentReader causes infinite loop. 
+    (Ilya Vishnevsky via dogacan)
+    
+16. NUTCH-444 Possibly use a different library to parse RSS feed for improved 
+    performance and compatibility. This patch introduced a new plugin, feed,
+    that includes an index filter and a parse plugin for feeds that uses ROME.
+    There was discussion to remove parse-rss, in light of the feed plugin, 
+    however, this patch does not explicitly remove parse-rss. (dogacan, mattmann)
+
+17. NUTCH-471 - Fix synchronization in NutchBean creation. 
+    (Enis Soztutar via dogacan)
+
+18. Upgrade to Lucene 2.2.0 and Hadoop 0.12.3. (ab)
+
+19. NUTCH-468 - Scoring filter should distribute score to all outlinks at 
+    once. (dogacan)
+
+20. NUTCH-504 - NUTCH-443 broke parsing during fetching. (dogacan)
+
+21. NUTCH-497 -  Extreme Nested Tags causes StackOverflowException in 
+	DomContentUtils...Spider Trap. (kubes)
+
+22. NUTCH-434 - Replace usage of ObjectWritable with something based on 
+    GenericWritable. (dogacan)
+
+23. NUTCH-499 - Refactor LinkDb and LinkDbMerger to reuse code. (dogacan)
+
+24. NUTCH-498 - Use Combiner in LinkDb to increase speed of linkdb generation.
+    (Espen Amble Kolstad via dogacan)
+
+25. NUTCH-507 - lib-lucene-analyzers jar defintion is wrong in plugin.xml.
+    (Emmanuel Joke via dogacan)
+
+26. NUTCH-503 - Generator exits incorrectly for small fetchlists. 
+    (Vishal Shah via dogacan)
+
+27. NUTCH-505 - Outlink urls should be validated. (dogacan)
+
+28. NUTCH-510 - IndexMerger delete working dir. (Enis Soztutar via dogacan)
+
+29. NUTCH-513 - suffix-urlfilter.txt does not have a template. (dogacan)
+
+30. NUTCH-515 - Next fetch time is set incorrectly. (dogacan)
+
+30. NUTCH-506 - Nutch should delegate compression to Hadoop. (dogacan)
+
+31. NUTCH-517 - build encoding should be UTF-8. (Enis Soztutar via dogacan).
+
+32. NUTCH-518 - Fix OpicScoringFilter to respect scoring filter chaining.
+    (Enis Soztutar via dogacan)
+
+33. NUTCH-516 - Next fetch time is not set when it is a 
+    CrawlDatum.STATUS_FETCH_GONE. (Emmanuel Joke via dogacan)
+
+34. NUTCH-525 - DeleteDuplicates generates ArrayIndexOutOfBoundsException 
+    when trying to rerun dedup on a segment. (Vishal Shah via dogacan)
+
+35. NUTCH-514 - Indexer should only index pages with fetch status SUCCESS.
+    (dogacan) Note: There is a bigger problem, i.e how to deal
+    with redirected pages, and this issue can be considered as a band-aid 
+    for the time being. See NUTCH-273 and NUTCH-353 for more details. 
+
+36. NUTCH-533 - LinkDbMerger: url normalized is not updated in the key and 
+    inlinks list. (Emmanuel Joke via dogacan)
+
+37. NUTCH-535 -ParseData's contentMeta accumulates unnecessary values during 
+    parse. (dogacan)
+
+38. NUTCH-522 - Use URLValidator in the Injector. (Emmanuel Joke, dogacan)
+
+39. NUTCH-536 - Reduce number of warnings in nutch core. (dogacan)
+
+40. NUTCH-439 - Top Level Domains Indexing / Scoring. Also adds 
+    domain-related utilities. (Enis Soztutar via dogacan)
+
+41. NUTCH-544 - Upgrade Carrot2 clustering plugin to the newest stable 
+    release (2.1). (Dawid Weiss via dogacan)
+
+42. NUTCH-545 - Configuration and OnlineClusterer get initialized in every
+    request. (Dawid Weiss via dogacan)
+
+43. NUTCH-532 - CrawlDbMerger: wrong computation of last fetch time. 
+    (Emmanuel Joke via dogacan)
+
+44. NUTCH-550 - Parse fails if db.max.outlinks.per.page is -1. (dogacan)
+
+45. NUTCH-546 - file URL are filtered out by the crawler. (dogacan)
+
+46. NUTCH-554 - Generator throws IOException on invalid urls.
+    (Brian Whitman via ab)
+
+47. NUTCH-529 - NodeWalker.skipChildren doesn't work for more than 1 child.
+    (Emmanuel Joke via dogacan)
+
+48. NUTCH-25 - needs 'character encoding' detector.
+    (Doug Cook, dogacan, Marcin Okraszewski, Renaud Richardet via dogacan)
+
+49. NUTCH-508 - ${hadoop.log.dir} and ${hadoop.log.file} are not propagated
+    to the tasktracker. (Mathijs Homminga, Emmanuel Joke via dogacan)
+    
+50. NUTCH-562 - Port mime type framework to use Tika mime detection framework.
+    (mattmann)
+    
+51. NUTCH-488 - Avoid parsing uneccessary links and get a more relevant outlink 
+    list. (Emmanuel Joke, Marcin Okraszewski via kubes)
+
+52. NUTCH-501 -  Implement a different caching mechanism for objects cached in
+    configuration. (dogacan)
+
+53. NUTCH-552 - Upgrade Nutch to Hadoop 0.15.x. (kubes)
+
+54. NUTCH-565 - Arc File to Nutch Segments Converter. (kubes)
+
+55. NUTCH-547 - Redirection handling: YahooSlurp's algorithm.
+    (dogacan, kubes via dogacan)
+
+56. NUTCH-548 - Move URLNormalizer from Outlink to ParseOutputFormat.
+    (Emmanuel Joke via dogacan)
+
+57. NUTCH-538 - Delete unused classes under o.a.n.util. (dogacan)
+
+58. NUTCH-494 - FindBugs: CrawlDbReader and DeleteDuplicates. (dogacan)
+
+59. NUTCH-574 - Including inlink anchor text in index can create irrelevant 
+    search results.  Created index-anchor plugin, removed functionality from 
+    index-basic plugin. For backwards compatibility, add index-anchor plugin to 
+    nutch-site.xml plugin.includes. (kubes)
+
+60. NUTCH-581 - DistributedSearch does not update search servers added to 
+    search-servers.txt on the fly.  (Rohan Mehta via kubes)
+
+61. NUTCH-586 - Add option to run compiled classes without job file
+    (enis via ab)
+
+62. NUTCH-559 - NTLM, Basic and Digest Authentication schemes for web/proxy
+    server. (Susam Pal via dogacan)
+
+63. NUTCH-534 - SegmentMerger: add -normalize option (Emmanuel Joke via ab)
+
+64. NUTCH-528 - CrawlDbReader: add some new stats + dump into a CSV format
+    (Emmanuel Joke via ab)
+
+65. NUTCH-597 - NPE in Fetcher2 (Remco Verhoef via ab)
+
+66. NUTCH-584 - urls missing from fetchlist (Ruslan Ermilov, ab)
+
+67. NUTCH-580 - Remove deprecated hadoop api calls (FS) (siren)
+
+68. NUTCH-587 - Upgrade to Hadoop 0.15.3 (kubes)
+
+69. NUTCH-604 - Upgrade to Lucene 2.3.0 (ab)
+
+70. NUTCH-602 - Allow configurable number of handlers for search servers
+    (hartbecke via kubes)
+
+71. NUTCH-607 - Update build.xml to include tika jar when building war (kubes)
+
+72. NUTCH-608 - Upgrade nutch to use released apache-tika-0.1-incubating (mattmann)
+
+73. NUTCH-606 - Refactoring of Generator, run all urls through checks (kubes)
+
+74. NUTCH-605 - Change deprecated configuration methods for Hadoop (kubes)
+
+75. NUTCH-603 - Add more default url normalizations (kubes)
+
+76. NUTCH-611 - Upgrade Nutch to use Hadoop 0.16 (kubes)
+
+77. NUTCH-44 - Too many search results, limits max results returned from a 
+    single search. (Emilijan Mirceski and Susam Pal via kubes)
+
+78. NUTCH-567 - Proper (?) handling of URIs in TagSoup. TagSoup library is
+    updated to 1.2 version. (dogacan)
+
+79. NUTCH-613 - Empty summaries and cached pages (kubes via ab)
+
+80. NUTCH-612 - URL filtering was disabled in Generator when invoked
+    from Crawl (Susam Pal via ab)
+
+81. NUTCH-601 - Recrawling on existing crawl directory (Susam Pal via ab)
+
+82. NUTCH-575 - NPE in OpenSearchServlet (John H. Lee via ab)
+
+83. NUTCH-126 - Fetching https does not work with a proxy (Fritz Elfert via ab)
+
+84. NUTCH-615 - Redirected URL-s fetched without setting fetchInterval.
+    Guard against reprUrl being null. (Emmanuel Joke, ab)
+
+85. NUTCH-616 - Reset Fetch Retry counter when fetch is successful (Emmanuel
+    Joke, ab)
+
+86. NUTCH-220 - Upgrade to PDFBox 0.7.3 (ab)
+
+87. NUTCH-223 - Crawl.java uses Integer.MAX_VALUE (Jeff Ritchie via ab)
+
+88. NUTCH-598 - Remove deprecated use of ToolBase. Use generics in Hadoop API.
+    (Emmanuel Joke, dogacan, ab)
+
+89. NUTCH-620 - BasicURLNormalizer should collapse runs of slashes with a
+    single slash. (Mark DeSpain via ab)
+
+90. NUTCH-500 - Add hadoop masters configuration file into conf folder. 
+    (Emmanuel Joke via kubes)
+
+91. NUTCH-596 - ParseSegments parse content even if its not
+    CrawlDatum.STATUS_FETCH_SUCCESS (dogacan)
+    
+92. NUTCH-618 - Tika error "Media type alias already exists" (mattmann,kubes)
+
+93. NUTCH-634 - Upgrade Nutch to Hadoop 0.17.1 (Michael Gottesman, Lincoln
+    Ritter, ab)
+
+94. NUTCH-641 - IndexSorter inorrectly copies stored fields (ab)
+
+95. NUTCH-645 - Parse-swf unit test failing (ab)
+
+96. NUTCH-642 - Unit tests fail when run in non-local mode (ab)
+
+97. NUTCH-639 - Change LuceneDocumentWrapper visibility from
+    private to _public_ (Guillaume Smet via dogacan)
+
+98. NUTCH-651 - Remove bin/{start|stop}-balancer.sh from svn
+    tracking. (dogacan)
+
+99. NUTCH-375 - Add support for Content-Encoding: deflated
+    (Pascal Beis, ab)
+
+100. NUTCH-633 - ParseSegment no longer allow reparsing.
+     (dogacan)
+
+101. NUTCH-653 - Upgrade to hadoop 0.18. (dogacan)
+
+102. NUTCH-621 - Nutch needs to declare it's crypto usage (mattmann)
+
+103. NUTCH-654 - urlfilter-regex's main does not work.
+     (dogacan)
+
+104. NUTCH-640 - confusing description "set it to Integer.MAX_VALUE".
+     (dogacan)
+     
+105. NUTCH-662 - Upgrade Nutch to use Lucene 2.4. (kubes)
+
+106. NUTCH-663 - Upgrade Nutch to use Hadoop 0.19 (kubes)
+
+107. NUTCH-647 - Resolve URLs tool (kubes)
+
+108. NUTCH-665 - Search Load Testing Tool (kubes)
+
+109. NUTCH-667 - Input Format for working with Content in Hadoop Streaming
+                 (kubes)
+
+110. NUTCH-635 -  LinkAnalysis Tool for Nutch. (kubes)
+
+111. NUTCH-646 -  New Indexing Framework for Nutch. (kubes)
+
+112. NUTCH-668 -  Domain URL Filter. (kubes)
+
+113. NUTCH-594 -  Serve Nutch search results in multiple formats including 
+                  XML and JSON. (kubes)
+
+114. NUTCH-442 - Integrate Solr/Nutch. (dogacan, original version by siren) 
+
+115. NUTCH-652 - AdaptiveFetchSchedule#setFetchSchedule doesn't calculate
+                 fetch interval correctly. (dogacan)
+
+116. NUTCH-627 - Minimize host address lookup (Otis Gospodnetic)
+
+117. NUTCH-678 - Hadoop 0.19 requires an update of jets3t.
+                 (julien nioche via dogacan)
+
+118. NUTCH-681 - parse-mp3 compilation problem. 
+                 (Wildan Maulana via dogacan)
+
+119. NUTCH-676 - MapWritable is written inefficiently and confusingly.
+                 (dogacan)
+
+120. NUTCH-579 - Feed plugin only indexes one post per feed due to identical
+                 digest. (dogacan)
+
+121. NUTCH-571 - parse-mp3 plugin doesn't always index album of mp3.
+                 (Joseph Chen, dogacan)
+
+122. NUTCH-682 - SOLR indexer does not set boost on the document.
+                 (julien nioche via dogacan)
+
+123. NUTCH-279 - Additions to urlnormalizer-regex (Stefan Neufeind, ab)
+
+124. NUTCH-671 - JSP errors in Nutch searcher webapp (Edwin Chu via ab)
+
+125. NUTCH-643 - ClassCastException in PDF parser (Guillaume Smet, ab)
+
+126. NUTCH-636 - Httpclient plugin https doesn't work on IBM JRE
+     (Curtis d'Entremont, ab)
+
+127. NUTCH-683 - NUTCH-676 broke CrawlDbMerger. (dogacan)
+
+128. NUTCH-631 - MoreIndexingFilter fails with NoSuchElementException
+     (Stefan Will, siren)
+     
+129. NUTCH-691 - Update jakarta poi jars to the most relevant version
+     (Dmitry Lihachev via siren)
+
+130. NUTCH-563 - Include custom fields in BasicQueryFilter
+     (Julien Nioche via siren)
+     
+131. NUTCH-695 - Incorrect mime type detection by MoreIndexingFilter plugin
+     (Dmitry Lihachev via siren)
+     
+132. NUTCH-694 - Distributed Search Server fails (siren)
+
+133. NUTCH-626 - Fetcher2 breaks out the domain with db.ignore.external.links
+     set at cross domain redirects (Remco Verhoef, dogacan via siren)
+
+134. NUTCH-247 - Robot parser to restrict (kubes, siren)
+
+135. NUTCH-698 - CrawlDb is corrupted after a few crawl cycles (dogacan
+     via siren)
+     
+136. NUTCH-699 - Add an "official" solr schema for solr integration (dogacan,
+     Dmitry Lihachev via siren)
+
+137. NUTCH-703 - Upgrade to Hadoop 0.19.1 (ab)
+
+138. NUTCH-419 - Unavailable robots.txt kills fetch (Carsten Lehmann,
+     Doug Cook via ab)
+     
+139. NUTCH-700 - Neko1.9.11 goes into a loop (Julien Nioche, siren)
+
+140. NUTCH-669 - Consolidate code for Fetcher and Fetcher2 (siren)
+
+141. NUTCH-711 - Indexer failing after upgrade to Hadoop 0.19.1 (ab)
+
+142. NUTCH-684 - Dedup support for Solr. (dogacan)
+
+143. NUTCH-715 - Subcollection plugin doesn't work with default
+     subcollections.xml file (Dmitry Lihachev via siren)
+     
+144. NUTCH-722 - Nutch contains JAI jars that we cannot redistribute
+
+Release 0.9 - 2007-04-02
+
+ 1. Changed log4j confiquration to log to stdout on commandline
+    tools (siren)
+
+ 2. NUTCH-344 - Fix for thread blocking issue (Greg Kim via siren)
+ 
+ 3. NUTCH-260 - Update hadoop version to 0.5.0 (Renaud Richardet,
+    siren)
+
+ 4. Optionally skip pages with abnormally large values of Crawl-Delay
+    (Dennis Kubes via ab)
+
+ 5. Change readdb -stats to use CombiningCollector (ab)
+
+ 6. NUTCH-348 - Fix Generator to select highest scoring pages (Chris
+    Schneider and Stefan Groschupf via ab)
+
+ 7. NUTCH-347 - Adjust plugin build script not to emit warnings when copying
+    dependant jars (siren)
+    
+ 8. NUTCH-338 - Remove the text parser as an option for parsing PDF files
+    in parse-plugins.xml (Chris A. Mattmann via siren)
+    
+ 9. NUTCH-105 - Network error during robots.txt fetch causes file to
+    be ignored (Greg Kim via siren)
+    
+10. NUTCH-367 - DistributedSearch thown ClassCastException (siren)
+
+11. NUTCH-332 - Fix the problem of doubling scores caused by links pointing
+    to the current page (e.g. anchors). (Stefan Groschupf via ab)
+
+12. NUTCH-365 - Flexible URL normalization (ab)
+
+13. NUTCH-336 - Differentiate between newly discovered pages and newly
+    injected pages (Chris Schneider via ab) NOTE: this changes the
+    scoring API, filter implementations need to be updated.
+
+14. NUTCH-337 - Fetcher ignores the fetcher.parse value (Stefan Groschupf
+    via ab)
+
+15. NUTCH-350 - Urls blocked by http.max.delays incorrectly marked as GONE
+    (Stefan Groschupf via ab)
+
+16. NUTCH-374 - when http.content.limit be set to -1 and  
+    Response.CONTENT_ENCODING is gzip or x-gzip , it can not fetch any thing 
+    (King Kong via pkosiorowski)
+
+17. NUTCH-383 - upgrade to Hadoop 0.7.1 and Lucene 2.0.0. (ab)
+
+  ****************************** WARNING !!! ********************************
+  * This upgrade breaks data format compatibility. A tool 'convertdb'       *
+  * was added to migrate existing CrawlDb-s to the new format. Segment data *
+  * can be partially migrated using 'mergesegs', however segments will      *
+  * require re-parsing (and consequently re-indexing).                      *
+  ****************************** WARNING !!! ********************************
+
+18. NUTCH-371 - DeleteDuplicates now correctly implements both parts of
+    the algorithm. (ab)
+
+19. NUTCH-391 - ParseUtil logs file contents to log file when it cannot
+    find parser (siren)
+
+20. NUTCH-379 - ParseUtil does not pass through the content's URL to the
+    ParserFactory (Chris A. Mattmann via siren)
+
+21. NUTCH-361, NUTCH-136 - When jobtracker is 'local' generate only one
+    partition. (ab)
+
+22. NUTCH-399 - Change CommandRunner to use concurrent api from jdk (siren)
+
+23. NUTCH-395 - Increase fetching speed (siren)
+
+24. NUTCH-388 - nutch-default.xml has outdated example for urlfilter.order
+    (reported by Jared Dunne)
+
+25. NUTCH-404 - Fix LinkDB Usage - implementation mismatch (siren)
+
+26. NUTCH-403 - Make URL filtering optional in Generator (siren)
+
+27. NUTCH-405 - Content object is not properly initialized in map method
+    of ParseSegment (siren)
+
+28. NUTCH-362 - Remove parse-text from unsupported filetypes in
+    parse-plugins.xml (siren)
+    
+29. NUTCH-305 - Update crawl and url filter lists to exclude
+    jpeg|JPEG|bmp|BMP, suffix-urlfilter.txt (contributed by Stefan
+    Neufeind) is also updated (siren)
+    
+30. NUTCH-406 - Metadata tries to write null values (mattmann)
+
+31. NUTCH-415 - Generator should mark selected records in CrawlDb. 
+    Due to increased resource consumption this step is optional. 
+    Application-level locking has been added to prevent concurrent
+    modification of databases. (ab)
+
+32. NUTCH-416 - CrawlDatum status and CrawlDbReducer refactoring. It is
+    now possible to correctly update CrawlDb from multiple segments.
+    Introduce new status codes for temporary and permanent
+    redirection. (ab)
+
+33. NUTCH-322 - Fix Fetcher to store redirected pages and to store
+    protocol-level status. This also should fix NUTCH-273. (ab)
+
+34. Change default Fetcher behavior not to follow redirects immediately.
+    Instead Fetcher will record redirects as new pages to be added to CrawlDb.
+    This also partially addresses NUTCH-273. (ab)
+
+35. Detect and report when Generator creates 0-sized segments. (ab)
+
+36. Fix Injector to preserve already existing CrawlDatum if the seed list
+    being injected also contains such URL. (ab)
+
+37. NUTCH-425, NUTCH-426 - Fix anchors pollution. Continue after
+    skipping bad URLs. (Michael Stack via ab)
+
+38. NUTCH-325 - UrlFilters.java throws NPE in case urlfilter.order contains
+    Filters that are not in plugin.includes (Stefan Groschupf, siren)
+    
+39. NUTCH-421 - Allow predeterminate running order of indexing filters
+    (Alan Tanaman, siren)
+
+40. When indexing pages with redirection, drop all intermediate pages and
+    index only the final page. (ab)
+
+41. Upgrade to Hadoop 0.10.1. (ab)
+
+42. NUTCH-420 - Fix a bug in DeleteDuplicates where results depended on the
+    order in which IndexDoc-s are processed. (Dogacan Guney via ab)
+
+43. NUTCH-428 - NullPointerException thrown when agent name is not
+    configured properly. Changed to throw RuntimeException instead.
+    (siren)
+
+44. NUTCH-430 - Integer overflow in HashComparator.compare (siren)
+
+45. NUTCH-68 - Add a tool to generate arbitrary fetchlists. (ab)
+
+46. NUTCH-433 - java.io.EOFException in newer nightlies in mergesegs
+    or indexing from hadoop.io.DataOutputBuffer (siren)
+
+47. NUTCH-339 - Fetcher2: a queue-based fetcher implementation. (ab)
+
+48. NUTCH-390 - Javadoc warnings (mattmann)
+
+49. NUTCH-449 - Make junit output format configurable. (nigel via cutting)
+
+50. NUTCH-432 - Fix a bug where platform name with spaces would break the
+    bin/nutch script. (Brian Whitman via ab)
+
+51. Upgrade to Hadoop 0.11.2 and Lucene 2.1.0 release. (ab)
+
+52. NUTCH-167 - Observation of robots "noarchive" directive. (ab)
+
+53. NUTCH-384 - Protocol-file plugin does not allow the parse plugins
+    framework to operate properly (Heiko Dietze via mattmann)
+
+54. NUTCH-233 - Wrong regular expression hangs reduce process forever (Stefan
+    Groschupf via kubes)
+    
+55. NUTCH-436 - Incorrect handling of relative paths when the embedded URL 
+    path is empty (kubes)
+
+56. Upgrade to Hadoop 0.12.1 release. (ab)
+
+57. NUTCH-246 - Incorrect segment size being generated due to time
+    synchronization issue (Stefan Groschupf via ab)
+
+58. Upgrade to Hadoop 0.12.2 release. (ab)
+
+59. NUTCH-333 - SegmentMerger and SegmentReader should use NutchJob. (Michael
+    Stack and Dogacan Guney via kubes)
+
+Release 0.8 - 2006-07-25
+
+ 0. Totally new architecture, based on hadoop
+    [http://lucene.apache.org/hadoop] (cutting)
+
+ 1. NUTCH-107 - Typo in plugin/urlfilter-*/plugin.xml. (Stephen Cross).
+
+ 2. NUTCH-108 - Log hosts that exceed generate.max.per.host.
+    (Rod Taylor via cutting)
+
+ 3. NUTCH-88 - Enhance ParserFactory plugin selection policy
+    (jerome)
+
+ 4. NUTCH-124 - Protocol-httpclient does not follow redirects when 
+    fetching robots.txt (cutting)
+
+ 5. NUTCH-130 - Be explicit about target JVM when building (1.4.x?)
+    (stack@archive.org, cutting)
+
+ 6. NUTCH-114 -	Getting number of urls and links from crawldb
+    (Stefan Groschupf via ab)
+
+ 7. NUTCH-112 - Link in cached.jsp page to cached content is an 
+    absolute link (Chris A. Mattmann via jerome)
+
+ 8. NUTCH-135 - Http header meta data are case insensitive in the
+    real world (Stefan Groschupf via jerome)
+
+ 9. NUTCH-145 - Build of war file fails on Chinese (zh) .xml files due
+    to UTF-8 BOM (KuroSaka TeruHiko via siren)
+
+10. NUTCH-121 - SegmentReader for mapred (Rod Taylor via ab)
+
+11. Added support for OpenSearch (cutting)
+
+12. NUTCH-142 - NutchConf should use the thread context classloader
+    (Mike Cannon-Brookes via pkosiorowski)
+
+13. NUTCH-160 - Use standard Java Regex library rather than
+    org.apache.oro.text.regex (Rod Taylor via cutting)
+
+14. NUTCH-151 - CommandRunner can hang after the main thread exec is
+    finished and has inefficient busy loop (Paul Baclace via cutting)
+
+15. NUTCH-174 - Problem encountered with ant during compilation
+
+16. NUTCH-190 - ParseUtil drops reason for failed parse
+    (stack@archive.org via ab)
+
+17. NUTCH-169 - Remove static NutchConf (Marko Bauhardt via ab)
+
+18. NUTCH-194 - Nutch-169 introduced two tiny bugs (Marko Bauhardt via ab)
+
+19. NUTCH-178 - in search.jsp must be session creation "false"
+    (YourSoft via siren)
+
+20. NUTCH-200 - OpenSearch Servlet ist broken
+    (Marko Bauhardt via siren)
+
+21. NUTCH-81 - Webapp only works when deployed in root
+    (AJ Banck, Michael Nebel via siren)
+
+22. NUTCH-139 - Standard metadata property names in the ParseData
+    metadata (Chris A. Mattmann, jerome)
+
+23. NUTCH-192 - Meta data support for CrawlDatum
+    (Stefan Groschupf via ab)
+    
+24. NUTCH-52 - Parser plugin for MS Excel files
+    (Rohit Kulkarni via jerome)
+
+25. NUTCH-53 - 	Parser plugin for Zip files
+    (Rohit Kulkarni via jerome)
+
+26. NUTCH-137 - footer is not displayed in search result page
+    (KuroSaka TeruHiko via siren)
+
+27. NUTCH-118 - FAQ link points to invalid URL
+    (Steve Betts via siren)
+
+28. NUTCH-184 - Serbian (sr, Cyrilic) and Serbo-Croatian (sh, Latin)
+    translation (Ivan Sekulovic via siren)
+
+29. NUTCH-211 - FetchedSegments leave readers open (Stefan Groschupf
+    via cutting)
+
+30. NUTCH-140 - Add alias capability in parse-plugins.xml file that
+    allows mimeType->extensionId mapping (Chris A. Mattmann via jerome)
+
+31. NUTCH-214 - Added Links to web site to search mailling list
+    (Jake Vanderdray via jerome)
+
+32. NUTCH-204 - Multiple field values in HitDetails
+    (Stefan Groschupf via jerome)
+
+33. NUTCH-219 - file.content.limit & ftp.content.limit should be changed
+    to -1 to be consistent with http (jerome)
+    
+34. NUTCH-221 - Prepare nutch for upcoming lucene 2.0 (siren)
+
+35. NUTCH-91 - Empty encoding causes exception (Michael Nebel via
+    pkosiorowski)
+
+36. NUTCH-228 - Clustering plugin descriptor broken (Dawid Weiss via
+    jerome)
+
+37. NUTCH-229 - Improved handling of plugin folder configuration
+    (Stefan Groschupf via ab)
+
+38. NUTCH-206 - Search server throws InstantiationException (ab)
+    
+39. NUTCH-203 - ParseSegment throws InstantiationException (Marko Bauhardt
+    via ab)
+
+40. NUTCH-3 - Multi values of header discarded (Stefan Groschupf via ab)
+
+41. Update to lucene 1.9.1 (cutting)
+
+42. NUTCH-235 - Duplicate Inlink values (ab)
+
+43. NUTCH-234 - Clustering extension code cleanups and a real
+    JUnit test case for the current implementation (Dawid Weiss via ab)
+    
+44. NUTCH-210 - Context.xml file for Nutch web application
+    (Chris A. Mattmann via jerome)
+
+45. NUTCH-231 - Invalid CSS entries (AJ Banck via jerome)
+
+46. NUTCH-232 - Search.jsp has multiple search forms creating
+    invalid html / incorrect focus function (jerome)
+    
+47. NUTCH-196 - lib-xml and lib-log4j plugins (ab, jerome)
+
+48. NUTCH-244 - Inconsistent handling of property values
+    boundaries / unable to set db.max.outlinks.per.page to
+    infinite (jerome)
+    
+49. NUTCH-245 -	DTD for plugin.xml configuration files
+    (Chris A. Mattmann via jerome)
+
+50. NUTCH-250 - Generate to log truncation caused by
+    generate.max.per.host (Rod Taylor via cutting)
+    
+51. NUTCH-125 - OpenOffice Parser plugin (ab)
+
+52. Switch from using java.io.File to org.apache.hadoop.fs.Path.
+    (cutting)
+
+53. NUTCH-240 - Scoring API: extension point, scoring filters and
+    an OPIC plugin (ab)
+    
+54. NUTCH-134 - Summarizer doesn't select the best snippets (jerome)
+
+55. NUTCH-268 - Generator and lib-http use different definitions of
+    "unique host" (ab)
+    
+56. NUTCH-280 - Url query causes NullPointerException (Grant Glouser
+    via siren)
+
+57. NUTCH-285 - LinkDb Fails rename doesn't create parent directories
+    (Dennis Kubes via ab)
+
+58. NUTCH-201 - Add support for subcollections
+    (siren)
+
+59. NUTCH-298 - If a 404 for a robots.txt is returned a NPE is thrown
+    (Stefan Groschupf via jerome)
+
+60. NUTCH-275 - Fetcher not parsing XHTML-pages at all (jerome)
+
+61. NUTCH-301 - CommonGrams loads analysis.common.terms.file for each query
+    (Stefan Groschupf via jerome)
+
+62. NUTCH-110 - OpenSearchServlet outputs illegal xml characters
+    (stack@archive.org via siren)
+
+63. NUTCH-292 - OpenSearchServlet: OutOfMemoryError: Java heap space
+    (Stefan Neufeind via siren)
+
+64. NUTCH-307 - Wrong configured log4j.properties (jerome)
+
+65. NUTCH-303 - Logging improvements (jerome)
+
+66. NUTCH-308 - Maximum search time limit (ab)
+
+67. NUTCH-306 - DistributedSearch.Client liveAddresses concurrency
+    problem (Grant Glouser via siren)
+
+68. Update to hadoop-0.4 (Milind Bhandarkar, cutting)
+
+69. NUTCH-317 - Clarify what the queryLanguage argument of
+    Query.parse(...) means (jerome)
+
+70. Added alternative experimental web gui in contrib containing
+    extensions like subcollection, keymatch, user preferences,
+    caching, implemented mainly using tiles and jstl (siren)
+
+71. NUTCH-320 DmozParser does not output list of urls to stdout
+    but to a log file instead. Original functionality restored.
+
+72. NUTCH-271 - Add ability to limit crawling to the set of initially
+    injected hosts (db.ignore.external.links) (Philippe Eugene,
+    Stefan Neufeind via ab)
+
+73. NUTCH-293 - Support for Crawl-Delay (Stefan Groschupf via ab)
+
+74. NUTCH-327 - Fixed logging directory on cygwin (siren)
+
+Release 0.7 - 2005-08-17
+
+ 1. Added support for "type:" in queries. Search results are limited/qualified
+    by mimetype or its primary type or sub type. For example,
+    (1) searching with "type:application/pdf" restricts results
+    to pages which were identified to be of mimetype "application/pdf".
+    (2) with "type:application", nutch will return pages of
+    primary type "application".
+    (3) with "type:pdf", only pages of sub type "pdf" will be listed.
+    (John Xing, 20050120)
+
+ 2. Added support for "date:" in queries. Last-Modified is indexed.
+    Search results are restricted by lower and upper date (inclusive)
+    as date:yyyymmdd-yyyymmdd. For example, date:20040101-20041231
+    only returns pages with Last-Modified in year 2004.
+    (John Xing, 20050122)
+
+ 3. Add URLFilter plugin interface and convert existing url filters into
+    plugins. (John Xing, 20050206)
+
+ 4. Add UpdateSegmentsFromDb tool, which updates the scores and
+    anchors of existing segments with the current values in the web
+    db.  This is used by CrawlTool, so that pages are now only fetched
+    once per crawl.  (Doug Cutting, 20050221)
+
+ 5. Moved code into org.apache.nutch sub-packages.  Changed license to
+    Apache 2.0.  Removed jar files whose licenses do not permit
+    redistribution by Apache.  Disabled compilation of plugins which
+    require these libraries.  (Doug Cutting 20050301)
+
+ 6. Index host and title in separate fields.  Host was indexed
+    previously only as a part of the URL.  Title was indexed as an
+    anchor.  Now boosts for matching these fields may be adjusted
+    separately from boosts for matching anchors and url.  Also: move
+    site indexing to index-basic plugin to minimize the number of
+    times the URL needs to be parsed; and, stop using anchor analyzer
+    for anything but anchors.  (Piotr Kosiorowski via Doug Cutting
+    20050323)
+
+ 7. Add servlet Cached.java that serves cached Content of any mime type.
+    Slightly modified are web.xml and cached.jsp.
+    (John Xing, 20050401)
+
+ 8. Add skipCompressedByteArray() to WritableUtils.java.
+    (John Xing, 20050402)
+
+ 9. Fixes to jsp and static web pages.  These now use relative links,
+    so that the Nutch webapp file can be used in places other than at
+    the root.  Also fixed links to the about and help pages.  Bug #32.
+    (Jerome Charron via cutting, 20050404)
+
+10. Added some features to DistributedSearch: new segments can be added
+    to searchservers without restarting the frontend, defective search
+    servers are not queried until tey come back online, watchdog keeps
+    an eye for your searchservers and writes simple statistics.
+    (Sami Siren, 20050407)
+    
+11. Fix for bug #4 - Unbalanced quote in query eats all resources.
+	(Piotr Kosiorowski, Sami Siren, 20050407)
+
+12. Close Issue #33 - MIME content type detector (using magic char sequences).
+    (Jerome Charron and Hari Kodungallur via John Xing, 20050416)
+
+13. Add a servlet that implements A9's OpenSearch RSS web service.
+    (cutting, 20050418)
+
+14. Remove references to link analysis from tutorial, and enable
+    scoring by link count when generating fetchlists and searching.
+    (cutting, 20040419)
+
+15. Make query boosts for host, title, anchor and phrase matches
+    configurable.  (Piotr Kosiorowski via cutting, 20050419)
+
+16. Add support for sorting search results and search-time deduping by
+    fields other than site.
+
+17. Automatically convert range queries into cached range filters.
+    This improves the performance and scalability of, e.g., date range
+    searching.
+
+18. Several methods have been renamed due to misspellings.  The old
+    methods have been deprecated and will be removed before the 1.0
+    release.
+
+
+Release 0.6
+
+ 1. Added clustering-carrot2 plugin, together with introduction of clustering
+    api and modification to search jsp. (Dawid Weiss via John Xing, 20040809)
+
+ 2. Make a number of changes to NDFS (Nutch Distributed File System)
+    to fix bugs, add admin tools, etc.
+
+    Also, modify all command line tools so you can indicate whether to
+    use NDFS or the local filesystem.  If you indicate nothing, then
+    it defaults to the local fs.
+
+    I've used this to do a 35m page crawl via NDFS, distributed over a
+    dozen machines.  (Mike Cafarella)
+
+ 3. Add support for BASE tags in HTML.  Outlinks are now correctly
+    extracted when a BASE tag is present.  (cutting)
+
+ 4. Fix two bugs in result pagination.  When the last hit on a page
+    was the last hit overall, the "next" button was sometimes shown
+    when the "show all" button should be shown instead.  Also, in
+    certain cases, the "show all" button would be shown when the
+    "next" button should have been shown.  (cutting)
+
+ 5. Add config parameter "indexer.max.tokens" that determines the
+    maximum number of tokens indexed per field.  (Andy Hedges via cutting)
+
+ 6. Add parser for mp3 files.  (Andy Hedges via cutting)
+
+ 7. Add RegexUrlNormalizer.  This is useful for things like stripping
+    out session IDs from URLs.  To use it, add values for
+    urlnormalizer.class and urlnormalizer.regex.file to your
+    nutch-site.xml.  The RegexUrlNormalizer class extends the
+    BasicUrlNormalizer, and does basic normalization as well.
+    (Luke Baker via cutting)
+
+ 8. Added Swedish translation (Stefan Verzel via Sami Siren, 20040910)
+
+ 9. Added Polish translation (Andrzej Bialecki, 20040911)
+ 
+10. Added 3 more language profiles to language identifier (ru,hu,pl).
+	Other changes to language identifier: Porfiles converted to utf8,
+	added some test cases, changed the similarity calculation.
+	(Sami Siren, 20040925)
+
+11. Added plugin parse-rtf (Andy Hedges via John Xing, 20040929)
+
+12. Added plugin index-more and more.jsp (John Xing, 20041003)
+
+13. Added "View as Plain Text" feature. A new op OP_PARSETEXT is introduced
+    in DistributedSearch.java. text.jsp is added. (John Xing, 20041006)
+
+14. Fixed a bug that fails cached.jsp, explain.jsp, anchors.jsp and text.jsp
+    (but not search.jsp) with NullPointerException in distributed search.
+    It seems that this bug appears after "hits per site" stuff is added.
+    The fix is done in Hit.java, making sure String site is never null.
+    Hope this fix not have bad effetct on "hits per site" code.
+    (John Xing, 20041006)
+
+15. Fixed a bug that fails fullyDelete() in FileUtil.java for
+    LocalFileSystem.java. This bug also exposes possible incompleteness
+    of NDFSFile.java, where a few methods are not supported, including
+    delete(). Nothing changed in NDFSFile.java though. Leave it for future
+    improvement (John Xing, 20041022).
+
+16. Introduced option -noParsing to Fetcher.java and added ParseSegment.java.
+    A new status code CANT_PARSE is added to FetcherOutput.java.
+    Without option -noParsing , no change in fetcher behavior. With
+    option -noParsing, fetcher does crawls only, no parsing is carried out.
+    Then, ParseSegment.java should be used to parse in separate pass.
+    (John Xing, 20041025)
+
+17. Added ontology plugin. Currently it is used for query refinement, as
+    examplified in refine-query-init.jsp and refine-query.jsp. By default,
+    query refinement is disabled in search.jsp. Please check
+    ./src/plugin/ontology/README.txt for further description.
+    Ontology plugin certainly can be used for many other things.
+    (Michael J. Pan via John Xing, 20041129)
+ 
+18. Changed fetcher.server.delay to be a float, so that sub-second
+    delays can be specified.  (cutting)
+
+19. Added plugin.includes config parameter that determines which
+    plugins are included.  By default now only http, html and basic
+    indexing and search plugins are enabled, rather than all plugins.
+    This should make default performance more predictable and reliable
+    going forward. (cutting)
+
+20. Cleaned up some filesystem code, including:
+
+    - Replaced BufferedRandomAccessFile with two simpler utilties,
+      NFSDataInputStream and NFSDataOutputStream.
+
+    - Fixed the bug where SequenceFiles were no longer flushed when
+      created, so that, when fetches crashed, segments were
+      unreadable.  Now segments are always readable after crashes.
+      Only the contents of the last buffer is lost.
+
+    - Simplified the FSOutputStream API to not include seek().  We
+      should never need that functionality.
+
+    - Simplified LocalFileSystem's implementations of FSInputStream
+      and FSOutputStream and optimized FSInputStream.seek().
+
+    (cutting)
+
+21. Fixed BasicUrlNormalizer to better handle relative urls.  The file
+    part of a URL is normalized in the following manner:
+
+      1. "/aa/../" will be replaced by "/" This is done step by step until
+	 the url doesn´t change anymore. So we ensure, that
+	 "/aa/bb/../../" will be replaced by "/", too
+
+      2. leading "/../" will be replaced by "/"
+
+    (Sven Wende via cutting)
+
+22. Fix Page constructors so that next fetch date is less likely to be
+    misconstrued as a float.  This patches a problem in WebDBInjector,
+    where new pages were added to the db with nextScore set to the
+    intended nextFetch date.  This, in turn, confused link analysis.
+
+23. In ndfs code, replace addLocalFile(), putToLocalFile() with
+    copyFromLocalFile(), moveFromLocalFile(), copyToLocalFile() and
+    moveToLocalFile(). (John Xing, 20041217)
+
+24. Added new config parameter fetcher.threads.per.host.  This is used
+    by the Http protocol.  When this is one behavior is as before.
+    When this is greater than one then multiple threads are permitted
+    to access a host at once.  Note that fetcher.server.delay is no
+    longer consistently observed when this is greater than one.
+    (Luke Baker via Doug Cutting)
+
+Release 0.5
+
+ 1. Changed plugin directory to be a list of directories.
+
+ 2. Permit Plugin to be the default plugin implementation.
+
+ 3. Added pluggable interface for network protocols in new package
+    net.nutch.protocol.  Moved http code from core into a plugin.
+
+ 4. Added pluggable interface for content parsing in new package
+    net.nutch.parse.  Moved html parsing code from core into a
+    plugin.
+
+ 5. Fixed a bug in NutchAnalysis where 16-bit characters were not
+    processed correctly.
+
+ 6. Fixed bug #971731: random summaries on result page.
+    (Daniel Naber via cutting)
+
+ 7. Made Nutch logo transparent. (Daniel Naber via cutting)
+
+ 8. Added file protocol plugin.  (John Xing via cutting)
+
+ 9. Added ftp protocol plugin.  (John Xing via cutting)
+
+10. Added pdf and msword parser plugins.  (John Xing via cutting)
+
+11. Added pluggable indexing interface.  By default, url, content,
+    anchors and title are indexed, as before, but now one can easily
+    alter this to, e.g., index metadata.  A demonstration is provided
+    which extracts and indexes Creative Commons license urls. (cutting)
+
+12. Add language identification plugin. 
+
+    The process of identification is as follows:
+
+    1. html (html only, HTML 4.0 "lang" attribute)
+    2. meta tags (html only, http-equiv, dc.language)
+    3. http header (Content-Language)
+    4. if all above fail "statistical analysis"
+
+    1 & 2 are run during the fetching phase and 3 & 4 are run on
+    indexing phase.
+
+    Currently supported languages (in "statistical analysis") are
+    da,de,el,en,es,fi,fr,it,nl,sv and pt. The corpus used was grabbed
+    from http://www.isi.edu/~koehn/europarl/ and the profiles were
+    build with tool supplied in patch.
+
+    After indexing the language can be found from field named "lang"
+
+    It's not 100% accurate but it's a start.
+    (Sami Siren)
+
+13. Added SegmentMergeTool and "mergesegs" command, to remove
+    duplicated or otherwise not used content from several segments and
+    joining them together into a single new segment.  The tool also
+    optionally performs several other steps required for proper
+    operation of Nutch - such as indexing segments, deleting
+    duplicates, merging indices, and indexing the new single segment.
+    (Andrzej Bialecki)
+
+14. Add the ability to retrieve ParseData of a search hit. ParseData
+    contains many valuable properties of a search hit.
+
+    This is required (among others) to properly display the cached
+    content because it's not possible to determine the character
+    encoding from the output of the getContent() method (which returns
+    byte[]). The symptoms are that for HTML pages using non-latin1 or
+    non-UTF8 encodings the cached preview will almost certainly look
+    broken. Using the attached patch it is possible to determine the
+    character encoding from the ParseData (for HTTP: Content-Type
+    metadata), and encode the content accordingly. (Andrzej Bialecki)
+
+15. Add a pluggable query interface.  By default, the content, anchor
+    and url fields are searched as before.  A sample plugin indexes
+    the host name and adds a "site:" keyword to query parsing.
+
+16. Added support for "lang:" in queries.  For example, searching with
+    "lang:en" restricts results to pages which were identified to
+    be in English.
+
+17. Automatically optimize field queries to use cached Lucene filters.
+    This makes, for example, searches restricted by languages or sites
+    that are very common much faster.
+
+18. Improved charset handling in jsp pages.  (jshin by cutting)
+
+19. Permit topic filtering when injecting DMOZ pages.  (jshin by cutting)
+
+20. When parsing crawled pages, interpret charset specifications in
+    html meta tags.  (jshin by cutting)
+
+21. Added support for "cc:licensed" in queries, which searches for documents
+    released under Creative Commons licenses.  Attributes of the
+    license may also be queried, with, e.g., "cc:by" for
+    attribution-required licenses, "cc:nc" for non-commercial
+    licenses, etc.
+
+22. Relative paths named in plugin.folders are now searched for on the
+    classpath.  This makes, e.g., deployment in a war file much simpler.
+
+23. Modifications to Fetcher.java.
+
+    1. Make sure it works properly with regard to creation and initialization
+    of plugin instances. The problem was that multiple threads race to
+    startUp() or shutDown() plugin instances. It was solved by synchronizing
+    certain codes in PluginRepository.java and Extension.java.
+    (Stefan Groschupf via John Xing)
+
+    2. Added code to explictly shutDown() plugins. Otherwise FetcherThreads
+    may never return (quit) if there are still data or other structures
+    (e.g., persistent socket connections) associated with plugins. (John Xing)
+    
+    3. Fixed one type of Fetcher "hang" problems by monitoring named
+    FetcherThreads. If all FetcherThreads are gone (finished),
+    Fetcher.java is considered done. The problem was: there could be
+    runaway threads started by external libs via FetcherThreads.
+    Those threads never return, thus keep Fetcher from exiting normally.
+    (John Xing)
+
+24. Eliminate excessive hits from sites.  This is done efficiently by
+    adding the site name to Hit instances, and, when needed,
+    re-querying with too-frequent sites prohibited in the query.
+
+
+Release 0.4
+
+ 1. Http class refactored.  (Kevin Smith via Tom Pierce)
+
+ 2. Add Finnish translation. (Sampo Syreeni via Doug Cutting)
+
+ 3. Added Japanese translation. (Yukio Andoh via Doug Cutting)
+
+ 4. Updated Dutch translation. (Ype Kingma via Doug Cutting)
+
+ 5. Initial version of Distributed DB code.  (Mike Cafarella)
+
+ 6. Make things more tolerant of crashed fetcher output files.
+    (Doug Cutting)
+
+ 7. New skin for website. (Frank Henze via Doug Cutting)
+
+ 8. Added Spanish translation. (Diego Basch via Doug Cutting)
+
+ 9. Add FTP support to fetcher.  (John Xing via Doug Cutting)
+
+10. Added Thai translation. (Pichai Ongvasith via Doug Cutting)
+
+11. Added Robots.txt & throttling support to Fetcher.java.  (Mike
+    Cafarella)
+
+12. Added nightly build. (Doug Cutting)
+
+13. Default all link scores to 1.0. (Doug Cutting)
+
+14. Permit one to keep internal links. (Doug Cutting)
+
+15. Fixed dedup to select shortest URL. (Doug Cutting)
+
+16. Changed index merger so that merged index is written to named
+    directory, rather than to a generated name in that directory.
+    (Doug Cutting)
+
+17. Disable coordination weighting of query clauses and other minor
+    scoring improvements. (Doug Cutting)
+
+18. Added a new command, crawl, that constructs a database, injects a
+    url file and performs a few rounds of generate/fetch/updatedb.
+    This simplifies use for intranet sites.  Changed some defaults to
+    be more intranet friendly.  (Doug Cutting)
+
+19. Fixed a bug where Fetcher.java didn't construct correct relative
+    links when a page was redirected.  (Doug Cutting)
+
+20. Fixed a query parser problem with lookahead over plusses and minuses.
+    (Doug Cutting)
+
+21. Add support for HTTP proxy servers.  (Sami Siren via Doug Cutting)
+
+22. Permit searching while fetching and/or indexing.
+    (Sami Siren via Doug Cutting)
+
+23. Fix a bug when throttling is disabled.  (Sami Siren via Doug Cutting)
+
+24. Updated Bahasa Malaysia translation.  (Michael Lim via Doug Cutting)
+
+25. Added Catalan translation.  (Xavier Guardiola via Doug Cutting)
+
+26. Added brazilian portuguese translation.
+    (A. Moreir via Doug Cutting)
+
+27. Added a french translation.  (Julien Nioche via Doug Cutting)
+
+28. Updated to Lucene 1.4RC3.  (Doug Cutting)
+
+29. Add capability to boost by link count & use it in crawl tool.
+    (Doug Cutting)
+
+30. Added plugin system.  (Stefan Groschupf via Doug Cutting)
+
+31. Add this change log file, for recording significant changes to
+    Nutch.  Populate it with changes from the last few months.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/KEYS	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,301 @@
+This file contains the PGP keys of various developers.
+Please don't use them for email unless you have to. Their main
+purpose is code signing.
+
+Examples of importing this file in your keystore:
+ gpg --import KEYS.txt
+ (need pgp and other examples here)
+
+Examples of adding your key to this file:
+ pgp -kxa <your name> and append it to this file.
+ (pgpk -ll <your name> && pgpk -xa <your name>) >> this file.
+ (gpg --list-sigs <your name>
+     && gpg --armor --export <your name>) >> this file.
+
+pub   1024D/A7239D59 2005-10-12
+      Key fingerprint = 4B96 409A 098D BD51 1DF2  BC18 DBAF 69BE A723 9D59
+uid                  Doug Cutting (Lucene guy) <cutting@apache.org>
+sig 3        A7239D59 2005-10-12  Doug Cutting (Lucene guy) <cutting@apache.org>
+sub   2048g/ADDE5978 2005-10-12
+sig          A7239D59 2005-10-12  Doug Cutting (Lucene guy) <cutting@apache.org>
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.1 (FreeBSD)
+
+mQGiBENNR5oRBAC2ZzxD2fXYht8qkfT/6tjWJxLG4KH2dLEWSYEzku8ZtJ7eA6X7
+/hcvZdhjGH0aA6MAEVSxh6LO1hmRARE2e2Br68j4TjwbQ0J5BOgkMMAArmQe7w6B
+RjKUI3H74Qbfjuk4Ebf1fNkRkpwuw+JxZu5pqpACqwv6nPhcSDDjbuA/1wCgj+++
+uxVSQMF4Xrd0hApOSYGHL8kD/jCU+vM3ILuFVTCgfC5RehmqwQo/f6KEv99jJSxX
+ClcksiLquOH8vMc3MV1YWOe4u93DI7iAYzCylS1s2Wn0bLEBrbdGKLMH4hSSMDRC
+pjnyvzvnEMhMU+Jn3LK6lQw4nHH+aDGFcYZ2pQen7JAcYz7l6QeTsvMnRV+v13K1
+/zRjA/9QUxrgg2N5WQnEhMegIWBKVhxQV6a2mSfeNd0ApxzdqdoHZNkUD+pKMB0F
+oQ9aP55KbtvFosurFgEmvwLIoMnQohxjIhdk0Hx3xMT17CtYl04F0C+QNxeXpWr7
+/B0kq8nALn17hXz5A1bFaiMHX86QmvNyMTDUC2VrVbkV251dlLQuRG91ZyBDdXR0
+aW5nIChMdWNlbmUgZ3V5KSA8Y3V0dGluZ0BhcGFjaGUub3JnPoheBBMRAgAeBQJD
+TUeaAhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJENuvab6nI51ZjRAAoIZ96gYE
+f8QCDpXkBQqtNgRiF4t5AJ9JKMrN/Ow+Kyl75FU9U2KWyPoMk7kCDQRDTUejEAgA
+m3UdcglfOdgqI7Z9XUX38yqiFzNozSvTdOt3j6evIVvjJ3e0P87tUQlrdsbMcaXd
++PAc7EA5LE0eJlE9jR1/18tsIlYi/n1hxz1lWtaZ+9he3yTB12QmAf4MMTXaRBkI
+ZqwdwZxmL5V+2TmhFT2bIzPLgrMHNsA4dtQuBak41GC+VXovqitS9Xzse2Ki+U9u
+SiRPsD7x5DcgJm9sg/zqCNrvDN8vOC8iHa/CIqsZr3xaPgfQLZp6Xk3doHLc6IJ9
+6knDAZvzJFgfj8MGCQoOExE/1XoNGTWcgoiy0D30ADG+rtIbaRT8tdQ6m19/ytqd
+Zm7ibB7b78/pyfvvcB5tKwADBgf9GwdUdHUPjezlFpcCI/K3XHKdPLi00HJ2L1O8
+5pErBjDyZ5ey7vAMuYB5O31dB7pncSVsTdt9RRQHS+iLrv9aJjvYhV4yQU0ADkgC
+9qEvxm7wpn76AT+Z1LIay/vNoQPxnfWq+uZD/Lnku1VcnMZ5teSG6uJzApBGYsgN
+xpPPsobKKvclZdhO5NhhZLFZ0taWh4pna2jpDTLmyRa4kO7p7rIixsKxFfLUUc33
+2RqBomnm9eRlSvC4BBCq6M7YPLG0Rv5WmzuuWpc865EaMoBEtwPQBb4+qcMN69Lp
+3x6EaymTWmHx1o8aUjAxhORE/miy53eGPzIXY+csjMyAmSxDG4hJBBgRAgAJBQJD
+TUejAhsMAAoJENuvab6nI51ZlTIAn0oHlUPw+v1gVUJ8D2Nu26knOqJKAJ4spe/k
+Sc2xRlsNP3tZiO+jYMAFSg==
+=goQx
+-----END PGP PUBLIC KEY BLOCK-----
+pub  1024D/7C491924 2006-03-30 Piotr Kosiorowski <pkosiorowski@apache.org>
+sig 3       7C491924 2006-03-30   Piotr Kosiorowski <pkosiorowski@apache.org>
+sub  2048g/4A70BB35 2006-03-30
+sig         7C491924 2006-03-30   Piotr Kosiorowski <pkosiorowski@apache.org>
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.2.7 (GNU/Linux)
+
+mQGiBEQrfF8RBACblz5gaIolsKdJgtdy913C+k/QXvaeg3R+8dXXkgVgC5vvRbUk
+Ei6UBRMU5H0cNE76d0XlMYP1MccqdowsfPfWxl04VViW6p+KHmBa2ICIWvq1PQXL
+XhocuRZn6dzfnxcFjsJlsKXtX+okzL9rc1AHiPsb+14XFQtd0/uxs/qeswCgyM68
+hxpwMZU4U0Q7yYkB8usVjbcD/iC65v+8DPhVgxp4o66JJqTYkBZ73mS4f/DDlQsL
+9qCj8h9rLYHmV85hSx3pBBDuz/HjIzu5ruj+l78H++WISXE82hj++OS0bpKnb+nV
+x/iN+b/Y0W0CzMEms+42LcNz1azvLL6ZBgLwnUePT1mBnOy6UgFW1XZGow/XO4Lr
+2py7BACP2WrV+rAzc8RcelmvE3eaAj0DJhAl2Brkdl7B4KDNpBTYZM2TaJ5G5pDK
+EhzH3O6IZP4dRh4iEipl+qcJ0eC5OlKHxqyXXbQYH5jzqkl+4cAQRkCliWuFrGcO
+o3XaOFE54dpY8FZbineEJLrg4Ynh592gO731IcP4gm401ORGv7QrUGlvdHIgS29z
+aW9yb3dza2kgPHBrb3Npb3Jvd3NraUBhcGFjaGUub3JnPoheBBMRAgAeBQJEK3xf
+AhsDBgsJCAcDAgMVAgMDFgIBAh4BAheAAAoJEEsO4ix8SRkkWLwAoLrn6dtn38yI
+8dja2k2lJJ7PVpOoAJ9qZO+QfOfJRf1H+1L6qOuviiDkR7kCDQREK3xpEAgAklbu
+2ctaceFu6nolNd3cnKNqDNppvSRSwDzZZytXjzV10E5VW7fYlN1+huOSV9nRLAIL
+stNloFiOdQGElT0t8Xi9N9X1BuzSkxWMKqDHaTOSnKNupCuDzz9F3oYXVMbLwZBG
+GJAMezd6WuCl+KyhsJgt0GD/H2Ucyck2CqTQRZFPOPOPB2urZbmw8F5bTI3u9J1Q
+ElwApNTrHS04HyNEq5o9j/iTMvvunnkliQFI0Z/flvfHaV6go3/ZhMeVkLU7m/mq
+bPh467HN0MTN5O+znak164nBumxcqD8yUF5TiWD42dykNffbN2ajZzgVvTxWerVV
+mqVMTetbhl3Hoaff0wADBQf/d+XRxh7etS3IO5Jvv85de9QvQPFm5JZpnTNfdnil
+b9G3WRjZIsdmAG2khtJNmlUMUegK0ej6jsCFmsWTqg8cbCG7TBcYySWKSTGklELu
+N69g9VaG60GUX6EOoEmfRMrINlq/5egRSs8gukb1qYC0+0ZpaiWu5+PDx7ocADOq
+FwZUcsp9k0c4y2QUxvzvuRNCjJftTNQT5k+r1RxBnk5RYxiL2ga/UfUXZ3gXZvPV
+sTeSDXiMAAHvCqnAKGyGK7boST+zAD3qkBLH7nL44rS+9H+piERCmavSLcxI0CoZ
+oJb9uniYB8HMLDhOc1HwN3L5QzEJ68JdGmdEapvxE0r6s4hJBBgRAgAJBQJEK3xp
+AhsMAAoJEEsO4ix8SRkk2WUAn1/AYISfGPmMKuppjMZmAcs8Svm0AKCWsjGPGKSi
+gKskqkY17BkNKkbBtQ==
+=j0Ns
+-----END PGP PUBLIC KEY BLOCK-----
+pub   1024D/0B7E6CFA 2006-07-06
+uid                  Sami Siren <siren@apache.org>
+sig 3        0B7E6CFA 2006-07-06  Sami Siren <siren@apache.org>
+sig          E222DE4F 2007-05-02  Mathias Herberts <Mathias.Herberts@iroise.net>
+sig          911203E4 2007-05-02  Mathias Herberts <Mathias.Herberts@gicm.fr>
+sig          302DA568 2007-05-03  Rodent of Unusual Size (DSA) <Ken@Coar.Org>
+sig          2C312D2F 2007-05-03  Rodent of Unusual Size (DSS) <Ken@Coar.Org>
+sig          F12F6072 2007-05-05  Fred Vos <fred.vos@gmail.com>
+sig 3        990ED4AA 2007-05-02  Knut Anders Hatlen <kahatlen@apache.org>
+sig 3        311A3DE5 2007-05-05  Ruediger Pluem <rpluem@apache.org>
+sig          A99F75DD 2007-05-03  Rodent of Unusual Size <Ken@Coar.Org>
+sig          5F298824 2007-05-06  Simon Pepping <spepping@leverkruid.eu>
+sig          4358C584 2007-05-06  Vincent Hennebert <vhennebert@apache.org>
+sig          4CEED75F 2007-05-07  Nick Burch <nick@gagravarr.org>
+sig          C874155C 2007-05-07  Thilo Goetz (home key) <twgoetz@gmx.de>
+sig 3        88817402 2007-05-06  Thomas Vandahl <thomas@vandahl.org>
+sig          01530235 2007-05-02  Luc Maisonobe (general purpose) <Luc.Maisonobe@free.fr>
+sig          40581837 2007-05-08  Nick Kew <nick@webthing.com>
+sig          5F6B8B72 2007-05-12  Stefan Bodewig <bodewig@apache.org>
+sub   2048g/A3A3EC3F 2006-07-06
+sig          0B7E6CFA 2006-07-06  Sami Siren <siren@apache.org>
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.7 (GNU/Linux)
+
+mQGiBESs8FMRBADhMg5ONjSVuSVJoYbOL8vvoygjO9qH/MS21Ue2Hx2qLf8xB1/W
+baVL5kEH0ixkeg6H+qO4gGpyJ/cdww0v0CjbxRZw2R2QP1PtpZgioGv4YYNstUis
++rx6+4x4ny6M9mlYEH9QsoqCrfZSSUKxuSTLRQOU97Bm1kPv9b7L2D9UbwCgwNvU
+T5cfQz0bZNtXpOfXhYQezK8EAN/djxVjIbpNEgGxSO7dkR3OH9He+FJcn1vWRBMm
++mNq5hzO2rAlJhpsL0tKh7+02PYoMeB9HpRfrxhE9NST7Pk6KaQ1H1HEJgTJQ3NH
+Oe1ZC2pmmaVOysFsVUbAA6WSwCcEPr/PWYxBpDXprny3M4AT4c2hBZ+yGweOLT6+
+tjsbA/0X2j6lgbyZe4/VmBdfGeTLxkb5SBXYfz83AN2Fp++VsiPSWTn5ilUzwY2e
+XotvX7Bm/noh2liOmiN83ZQrChBOlmYA/vJ7HWCI5rZkvxU+xkRFrwh+pSHBk2Ar
+GmZ8Q7LjYOnDyNIh+igVifkrlUlNKh3k8BVEXsH1OxffO28LzLQdU2FtaSBTaXJl
+biA8c2lyZW5AYXBhY2hlLm9yZz6IYAQTEQIAIAUCRKzwUwIbAwYLCQgHAwIEFQII
+AwQWAgMBAh4BAheAAAoJEAKlpgULfmz6vl0An0KCSRbIZjNFyQoDTR7Y/21tw94h
+AJ93zAzfB8woj0MuqiOtUZ29OX/m+4hGBBARAgAGBQJGOOPMAAoJEBVFs/7iIt5P
+au8AoJBhBjsv6RD1sYBsfhbaBsZyaENHAJoD/2IhBBMaBV3fNsTCbQilad2YAIhG
+BBARAgAGBQJGOOh1AAoJEGPQra6REgPkOL8An3TWNp6bYNIRwWRKUYsCEHWQ4BMu
+AJ9ix4bnBUf6R33seqwNz7Gp7z8eDohGBBARAgAGBQJGOghLAAoJEFCOrsUwLaVo
+azcAoIaOwIwqXgW+4xZ7GYPfJEFrHGPbAKCG+gIdVX4NmNaxERZPj2qLycs9w4hG
+BBARAgAGBQJGOghLAAoJEN26ZLosMS0vazcAoNgtdYT1uCNRLTdGaYhPvjGfVr04
+AJ9MhDZ+LUm6/+k783wrph1mRU2iY4hGBBARAgAGBQJGPMSSAAoJEJhw7/PxL2By
+uIoAnjog0y6x+vqOqJV+AWDbM99ZrOH+AKCC4u8eDndLGM9XwAp5Tl7jVr5oqIhG
+BBMRAgAGBQJGORnaAAoJEOHh8rCZDtSqY8YAn0n0/gjvZKp7/bwoIj9T7jBkjpbZ
+AJ40MSZ32QBcRnt2vP6vK7/SXpshE4hGBBMRAgAGBQJGPE+UAAoJEEwEKBgxGj3l
+9OQAn145exS7RQZNTU8+BjBzSmRBGL9BAKCJ66ln9ObH2GwEHEhlS0fhEaJAD4ic
+BBABAgAGBQJGOghLAAoJEJrNPMCpn3XdE8kD+wQBy+g+4TS8IVraka2wfibUpuqo
+6UdRXiOO0CUWGBNq1jPE7LthT7tSf76Scfk7p2OiG0DfmkCBhi6hD1TgESOUOuG6
+QJM/VTwNg8KwvKXMgEd0drh/waktIIZoo/PS+LGYsyiLEKk43FL86vt2X+JKlMzq
+YMf4gBSUH0zOY76JiEYEEBECAAYFAkY+F+IACgkQPipvwl8piCRhxwCfUFugC5pP
+Wl+slCQHx5UFurZz9gkAoKBiE3Bioq4HFjJuv0D/qgZyI7lsiEYEEBECAAYFAkY9
+9qwACgkQoHLU0ENYxYSDbwCeI+tmaYaX+U0MRVgpcqxFQ2vl3S0AnAs+6O9AODcJ
+klKO3e0thKM2G2fXiEYEEBECAAYFAkY/KaQACgkQ9cJgFkzu11+tDwCgiK9ECsIi
+fAVeSCQNaKGxD+FBhcUAnjOQQiHi3KMEjFl9gVpgKfONxoqOiEYEEBECAAYFAkY/
+YEcACgkQVCINLMh0FVxGHwCfcxoMj6mG0n7D32LL/ZPpwOzL0fMAnA5/eSfX2hRV
+UidFb9vDrDHAiO3tiEYEExECAAYFAkY9oqAACgkQLrlGgoiBdAIV4wCg/2DcoBRE
+vX/jwva03HUxwo9KnFwAoJjyklqFD7bUaOk7erRTDZl5E/e5iEYEEBECAAYFAkY4
++qUACgkQc92MFgFTAjXKNQCfdegQg1dlZmv0VWZiptomzki5npcAn1ZtbYVdwDcp
+iNkvIO7NgoqeEj2fiEYEEBECAAYFAkZAuoEACgkQbQvHOkBYGDdl1QCfSItgGzht
+wJcShzNc+OOBicoSOrkAmwWW0//qdVR8AGr73QnBSgruFhp0iEYEEBECAAYFAkZG
+EbYACgkQohFa4V9ri3IIzgCgs7keksKkYWvxLEuU10a7I/sSs6oAoIcj+YpsPrgu
+oXXnGKrRA/sBOyBUuQINBESs8GwQCADGOIvlRuk94D+TY/VCO0sB/8lOobiU3qQF
+tFu8FsA4BgjQ9i6yR2wLEiZ0Cil4kWc6b19TSAarhqCAJ4GVmGIaJI3zrkab9HDA
+Dklu2XSFsTllxzOFoXuJKwoTCu6LUBNyJjLjfZkfbwelZWJmQE/Ml53Cm1324ivN
+JZro4oH7E0u8+tgyZI8e+noJE9ErIRLvUimBebDxR3Lzyrk+Qx3EzQso8A2/rwM/
+HUhZECJkZO3f/TMmv2XFbdHjBoi8XWsHnj1U9wuX9nWRedWvPgHtK4wwy3MMmwUP
+sXTcr2PFhwgH5PJKZCHsLvu4XPe8fhfC+IG2u8SLauHuYZbuDB0rAAMGCACvTgSx
++5IRPX4FOZWELvKn4PL80TiTN3XwjgvhU93a+EPbLQGtUUrhDjlMlXYlC8xkf+si
+PSNFUwLtwpPCbY/SKB//uvHD/K87wOMJhVXAjXNq2DmVJSI3EOiHY7WDVyj0ZABN
+t0yDhhxPeKHpOA/LLXRWmrUPXLNYTtJ3xuWUljmK94rXvtBDUgVa2sq/hPZMJ1MS
+23yWQ8e+9D86hznl4aDvkM5R7/slId1bTGG3Wyp6sSgVB3UzuFjmyzQwb6qcw6tq
+qFN7nhHwt7zMPLGH4C6Qh6jg+7I9J0zKgkDFwmrRisi00jniwBznQ8gh+vTuGjSl
+ZaKrFZaarjsg9rl7iEkEGBECAAkFAkSs8GwCGwwACgkQAqWmBQt+bPqj2wCfaWWf
+EKr9nFpVPXbnb+DUiMJwkYgAoKCkkWjinFVPzMJ8/NYuewG7t0s3
+=paEI
+-----END PGP PUBLIC KEY BLOCK-----
+pub   1024D/57163A4D 2007-03-08 [expires: 2008-03-07]
+uid                  Dennis E. Kubes <kubes@apache.org>
+sig 3        57163A4D 2007-03-08  Dennis E. Kubes <kubes@apache.org>
+sub   2048g/009F61B1 2007-03-08 [expires: 2008-03-07]
+sig          57163A4D 2007-03-08  Dennis E. Kubes <kubes@apache.org>
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.5 (Cygwin)
+
+mQGiBEXwe5MRBACdKnn1SIT/2LzFndKHZ2aK8G80FyebrbeeiiyiLpvggdvsOLKn
+VHvXkoxyw+hPTxq3voTrKMrkbWTc/df0bbkpMfHpcrUrWNo1oNTZ9VJ5n2PxAP6F
+6BHXMEWNJn2/4kZrDHCzkxOLz7KgfsCK5Xk+HDabj+9d8YVevnA8HGuoTwCglciu
+2/k8O59IAi0zdZXNsn7vMTsD/1H6gDjmJvrwnX6t1BXG/3tiU8yjJBkJ0bY9N2hK
+Dn5B9gH5mrZbxEaMYjDDwvPRqnjTQYt6DUdIlJI6xFTPwJ6120sTcX5O+vw9BCzX
+fzBKzAs90ZrDOreOoCtXnack6plw9Uuouf7R+O5sT7I4BV0u85lTizPl9I6MVTif
+e8GfA/0ah6yf48Zvm2cOFSb7/S8UvvKmoFCr2YTD31/+QEbkrbbQIynW+Fpsfrek
+YrriUr4vzYg3GKM3hWgJqrd9vxwmkDBdK3ECYocDGhsNxoT46Wb8fdz4eAisqEvh
+otsQkFk5AxydcUv5NeLjMfpgdaBAHsQgZxLIQ0IJjY9a5KcstLQiRGVubmlzIEUu
+IEt1YmVzIDxrdWJlc0BhcGFjaGUub3JnPohmBBMRAgAmBQJF8HuTAhsDBQkB4TOA
+BgsJCAcDAgQVAggDBBYCAwECHgECF4AACgkQFtbDe1cWOk3nwwCfe4qUYh6m6Bep
+xNVQLlZiiVZ9WLEAn130AJJsLfGdZNKvoxn4Y8H9kNL6uQINBEXwe6EQCACtnr9V
+u3daWEr2ytkRnhAW555eJw8CfIYO3kKmPoQljNJESETfphVU53BxJsjH1EMCviCx
+mKH7tJ0cPK8ASI285QfN2MU3Wq+AaysGKKVe1PHwJ5onA1/7x7/1BIMkAzH7EuN4
+CBeC+Y7LiuvcOTL7llKwomOFJQmW6LDP1bsvofeBSPfI+oXKoLZiVcEdZEn5fWT7
+GkbL0OYBecByca6Tp3UkHFbl1/2l2Bk1iL2BRsLMBKRrI0jRVUERIewu+SKU9+N/
+Efr52P7h9Hp3QSu9fhoGaAkYDmMXZGnjJm4IjuTG81kGrbwSZxtMgM7HO4jV32G2
+U4kq9ci4u280Hj7LAAMGCACrOWBPfugEHGYL6WZ3Mi2twgv/NScyYyDs9NfqSOUO
+lNWiOQx4cXQYtXOlDXcQxnGqfsxnUwvn3Wo6aIQ3JSVTsyKP4eH/kKc7hXyIF/8n
+0wLwfBDW9z7pWCcJVxgCKLAspV6QRhNc3eMEm8SuTC/7APygYS2wqgsqhOon8xYT
+JEAETOPC6gv5jkgKLqMqT+QePrtRSwr2VxnmMHcZ0kZkymheoZEg01+/VGyyIQvL
+nRJSiTQ0ZG5R+arzOQdmhCl8epbJsNrDGgcat2L/jFuwu7fFMNMHzk9lM+pVrxdo
+IQJOwul5awUhn6HZcMMH8IlfmRwlOqBFLgcp4hYVOwV2iE8EGBECAA8FAkXwe6EC
+GwwFCQHhM4AACgkQFtbDe1cWOk3PAQCfaPo9dh6yCyKRrjdxqL7PyrY85K0An0fB
+dvHkWSOQtUJdKA7zGg7lzvIJ
+=hSsV
+-----END PGP PUBLIC KEY BLOCK-----
+pub   1024D/24BCF054 2007-03-27
+uid                  Chris A. Mattmann <mattmann@apache.org>
+sub   4096g/20DC0E32 2007-03-27
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.7 (Darwin)
+
+mQGiBEYIb+oRBACFKKBk/2cevERru7wgEgRZvJSlIwEEEc37Bk9bQrcfXa2VwcZo
+Mbeu/c4NaFCHRracwYQ2he0o7/15WWpbSoQsVmiM/wBw1iXjvU6y14we/y31ApJD
+BhsbqLA7LrOoK6jb9F/IOiN2wApbfGHbfOM5xa1wtlNn9V4WZ46cMtbxIwCg+YLk
+fP6EWaw8BUbWHCGok/tTnMED/Ag/xzMj0BLqUJAngT+WuLUP7a9XyICZvRoKFn+L
+hIoPOcgubKpC1rb0AbG43pzMQ3UVIg+fMxMnVjxPFMn7UINLoYb027DA8Anwqq+m
+c96ChQaKENDtx0aJ9/02B8HhfS4lX9uEzGGPzkri8yvRC+UkxsCuGlf220o4tiG5
+e0cMA/4gZ5ES/iaQUxU0UlQA+FrAAnCoLBZFEavLlp/PPjkIrrheOYffFhznZrbl
+ao+QlNIeLiFsfbNlcHWXrodkaWYwxHdPpm451x0wrkpqbgREaK4wvB4kFXYOf6ay
+serEWBO9Xec+cKysQlQDPXnhzRjumJbPZ6LBtqN5TNbzuenwpLQnQ2hyaXMgQS4g
+TWF0dG1hbm4gPG1hdHRtYW5uQGFwYWNoZS5vcmc+iGAEExECACAFAkYIb+oCGwMG
+CwkIBwMCBBUCCAMEFgIDAQIeAQIXgAAKCRCPIVxaJLzwVCVWAJ0fkwWjoFJCX4rD
+Bn8uyyHBiL1+tQCfUQpFuoVRddzGFIKunoPSUYSvSmi5BA0ERghv6hAQALWZlRly
+d1qIEsHJUfjGTnX40D79iFznkV/2900Ex0GpP53YiL/LGw8hLN8a+Ko420LHwro7
+UsgHaNO5BH5mwWeYSyKR23xKlhuQrtdfS3dLYTfjJ1yANWWeWgw5QW5D7ZeGpjc7
+dGeEKpmi9rQ/VdqPmJLqaC0gmEDiFolWuqXEerwboYZunKXMWZTGbjRei1h4hjm5
+htq5jSSB4IcQpeCTu99P6x3o33YODStG4SDP/J/9LMg2XzuUJ+xd8xM9l3EO4Zb0
+hITqojpi3K3wy+v0xPYZ/RLkb5Ef7cbP8YMIU2CRid7tnpLrp50dVI2++XlDTPDc
+vMMRYM/R3TSyoc/cPaNMlezy8m6puaww5Q8780eMQsoXVPVafhxpcMZU0+X+57iJ
+ohpGYRh6BtPyu85W8hI3drrJGn3gNutkQENcOhKnISOxtcjPHRPnaqFt6XUjRj/Y
+9n7uSVR4d7T8atayGWWuKtWp8L496XjDBJMo62QKbj4aL7vfjOnmeUnMrDlwCtmP
+LcSCgK1eAEHyX26e1A4Cyrag/e+2tl+5mV5GI7isAPLCOGA86QSKb66dIbOlSFia
+9BjSDXd+vzrsTVk/oH8M7ibFl87piI5ABJhOmnksEqACRuLlSbn/aiqaQV4+CqBv
+N+EVYAAncZbEOmappOGLr0xc9teS90fDxi+vAAMHEACu7/SksMQXJntj855l7ALa
+EWT//o9req+RG13v8mOTL3DZqJUNg+YgxpVpkc+8N0txzhJ/8XcKTqaxN7YHgE4N
+42Ot5roAPK/8KKhOTCmAVouxOD2W2hVXmeclcwya4CECpM3Q8B+bSNSd5AnH7oYD
+i9nWcphLahDXwhCndWr6p10OtAkNGeZylVJJQmi5rVHX4CRfoV8RVxwOnIk9ca4q
+36l10ER3xkVhhPO8zouFAk1B63HSXN2tWwOfZRxBaJC0O5ixhs4aylLuOpRw9uZl
+njKHs8KU2xLgUcOQL5rAyjLDr4h7zYkHgGOMe6zHLrym29zBzzjJMDryi4nvh8z0
+i3uS2ydxEkFwddd7fDVaXILEPHSmJYaMgxJeXIARx+ovvDvbnWvliV6SWyGMm+FB
+sEhZ+1mWZDnW8TJzifn3nteXLqd7o2t2kFUxboIcLlPtebhhOJgtZp8GraTjpL0O
+P7E+iEjcdVu/4ULtfECscz9w9VxzYyx5m7zf88ZMTOAzLzUXBN4uPxMEpMrZ5/PK
+m9WnR23XyKEObao2LPFk9feJXjnNNJfN7irB44AlfQlX2uPAuiQyCEm0c6DvdjRd
+vIxV0MEa+WSBaplFlj0ACfs2Hdh6skErvMAzXHBCRshVnXB7hxfHdSKgLebTL4Rn
+7KYWxKB6rwd9jQsv9SRyeohJBBgRAgAJBQJGCG/qAhsMAAoJEI8hXFokvPBUYfIA
+oPLz8+OED4Ec4Pjuuic3HX2Ff0myAKDd74+vKoAC78+CStjf1pSSmy4y4w==
+=mbQZ
+-----END PGP PUBLIC KEY BLOCK-----
+
+pub   4096R/C601BCA7 2012-04-17
+uid                  Lewis John McGibbney (CODE SIGNING KEY) <lewismc@apache.org>
+sig 3        C601BCA7 2012-04-17  Lewis John McGibbney (CODE SIGNING KEY) <lewismc@apache.org>
+sub   4096R/FCD9FF28 2012-04-17
+sig          C601BCA7 2012-04-17  Lewis John McGibbney (CODE SIGNING KEY) <lewismc@apache.org>
+
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v1.4.10 (GNU/Linux)
+
+mQINBE+NSUkBEAC3Qu1mT3x0swS4zXta2NnJtrepOqpsU292U+hzkbjdG8W+W2WA
+3oRdd5f/iKkkE1Z3q53qD++PazLQf+g+378Ce+CP4bwhZuz/CgSa8EO2rIXadVUG
+M+XBAiSlLWyQhwW8qbipGQvpT1PXp8mjwXlWzt+0+4F9ybepYxStUPaybIFfSn+f
+M8YzYLgfKSsHMgPeK6TGRJAqC+u7t+XMYWmfVS9TpoOyfZ3tsn3YmeH4JiqF49/0
+XzkqgM7FW52By64Nm6xCOfqXCaMmVV5JRuZFhLB4VmWlH/Mikv5Tu99gsAdGwFIb
+MhMWtWZ/azKarTkQiZjDka09Mxc6skXCBBbxz9lstE4X50d5PMqOgVBtFstmL64h
+Km2dSIdVEUyjM9y1HBRZO1+ooNs5xja1DnSAuytstrRnt5Vdnuk/RS8t2qfcm2jP
+NWrZNOix9U+pT7qUQ1wbK/ew+qWbNFlvp9i3XyZdfPpyEmYD4CsBvkVbiH+FULwS
+F4OJQlJoDJ1vHnSPMNSGtiNRTLSQ2+E6huqktyAY+rcTamCEkCdoZ5NTyMbEgqZ1
+P4fr+h+EpV0h/ACzjhE4sq6MK6KZFv3a3Erlk4oC93BVJpcYyZyQneKQSapbAv9u
+oYCTLHyCrBdXItnFEHhy1zN0DvbWoGtsxDvAVjY3D9YP32Yu3WvxeW25bQARAQAB
+tDxMZXdpcyBKb2huIE1jR2liYm5leSAoQ09ERSBTSUdOSU5HIEtFWSkgPGxld2lz
+bWNAYXBhY2hlLm9yZz6JAjgEEwECACIFAk+NSUkCGwMGCwkIBwMCBhUIAgkKCwQW
+AgMBAh4BAheAAAoJEPReeXDGAbynxnQP/1s1e1eDUAvZv1k+OVhG+nDhqtBtmFV6
+sx67atpzZCj6ckKXphkiWAFmYsAH7pujHgASuAIoMY7MLjaRuG2MiEdWINYH5LVB
+xmZ3M9f1+YBuTSs/0KKBfqVBYm5vbEC+vBkjez54DOJ7OfRQllra98FR5GxEoYhh
+bIQDtUtYrLjzd9kbUH5J+cTgSJ08ciIxanscvFRE7+X2sQTopor6f+o7iea7k6KM
+b5FJ9mi4Q3RQbkorncyyDp4O7rBsuaGeD2oORdSM1zT5ql3glq7cYUI8havHY696
+jWYLOc951l6fDofGi4ZirX0+Mlxj+d2BNY54rx9dl6pZOmahvD4pveq/vbzwOH9E
+vb1uTfRIYLaNW++1nXzPBZ5nzsemDb3K8yVYXnCDrqmzOZMJu5AinvUUusTrRhT/
+4oy2AO1YEIjgwHFzYvv7C7/wYSQC5AxvO0plvyH/kMK/vQk3H7I13isHdyZhEjrR
+e+ciNzPWh4R6W8zVbe29MljItmINWniJ/CnYi9/r7ZtkQUBUCmHQZcsCm2DflA83
+ueLozFY3NH2eQ4q9dY8QIJDOpsX1SrP8DUOpuai3PvEiE8stHxGpamFq2DgnS81x
+/e/kSbIBD6QGgP1S7Zrkdz4jriCCY4mv9mYMu9De/sObYcpGdg6rE49lz9NWeE8w
+Wtt1oexR6DhpuQINBE+NSUkBEADOm92hnYd9ZNSmaVSUegmo0Rx9CMIzRZzHXPXT
+SxxMnJScWDKeTWa7U1A0peiNIUKKlgFcnUY176o4wk8y2sNgyYkYO6wQlzmoyQIh
+Ft0fqE3LMKBJcW2JONWFVrFZpRPTFvRWnDOSur8IQq3rJkyiqfT5y0E7PAdd8aa3
+l7anp8gfKCf9iIYtgfNsKNphngkwOLNDVsED7G/VRfAezjDKyf0M9HSL0fjQ5YDe
+L5MMmgduvYKBtWISM5tqJAunkMpGeWJ6/khJZT+bLK8iLM2073W5uSlNs6oO2AM8
+lDvfmnsFC4178mbU9nJNi+KAXzwZXH4xcqywRKZhuWI5BVPGi50HJ/RIZtDyrkrK
+W7NACtmniuFzSy9PxrM2iappUsfY8b7uZBzGoo1BzT7F7VM7sSte+X+zs8TZ0dam
+6TbuGMuv5rPQGAwu2JWUNOeBzXvfkg3gzk4qZrBdHtUrQjx33c1NBZddLcoSqzgC
+ph2cz4NG4Fs/Mi8SXoKBwJGVeWE+ZCBma8vFP/zctb/XroIaFSE5rAwHydwCB4gu
+VB3rNuLCoiiB50lPzAPFjjFxOuZeTZfl4bp1XRE1KKYi+n974At4HDd5g0Az8w37
+5/9G+pARCzjytvIHJTYQDsG0hfnj2Vfb5WWYF6LMib0ZGf739Yp7L602/yE9QAKm
+bifPCQARAQABiQIfBBgBAgAJBQJPjUlJAhsMAAoJEPReeXDGAbynzc4P/AomVPfY
+bY61TE+QSKAJl8/dyyw+LSddTPFTleVBFHlq1tnQmLWxoNq5t1CRXUJOv3q6haPE
+PLKR5pXXtNzAGVP74Jipa5r8FQjBG0j+XriiHmr861xyno0uPG23c0LSRqHrcLi6
+tgN2Q2ihu1Tjaql+ukzPI6u2v97FD0qhJWKvFFo64p7HTNUXHJLQ9N/m1Pien7Nm
+KFLRI0Pu0CW95I1w2gAAlS++lIxT3/ANfw6SpK9+lNBaan1g0xM5/P54MIQvZgCQ
+gdIcWdAOmXjTyMryconkeNRWpkYjXG4hZj9crP48j3lZPlUYol4pdkQ1CtSq1emv
+VDGoUrn5bRWoybOFfx3joOLpUqJA5PDjeN7YMpJNWc3O/lz+S+sW9WZY7vwbK+Mn
+E/l4Bz2k9fQDsxm2rPzM2aS/qaBo9v7vj+NE85B2/NE9cXo0WoC8u5o+KEQY6urV
+ANW/A0k94wmfoBMbmzNZ5Y5zJ9vceW9d4FE2FXaynRke2awYHBZE2Ty3MSxCQAvp
+MREQKzxB1XcR+Frj0nMKMmdEmM55OmIgAqAct1OuGDbOATJMcmVuwHqTZIdynzqh
+NPgXHx4ASqesjF/9GUrAQfOmXqHdOF6xOb7YYGssl1kgvOQRVJhkWtmTckyk+xu9
+U3Wt+q9F6O+RmemV6a6mrpog+Aq+BkIMWCJ8
+=xHbT
+-----END PGP PUBLIC KEY BLOCK-----
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,5793 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   
+APACHE NUTCH SUBCOMPONENTS
+   
+lib/commons-httpclient-3.0.1.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+      
+lib/icu4j-4_0_1.jar
+
+ICU license - ICU 1.8.1 and later
+
+   COPYRIGHT AND PERMISSION NOTICE
+
+   Copyright (c) 1995-2006 International Business Machines Corporation and
+   others
+
+   All rights reserved.
+
+   Permission is hereby granted, free of charge, to any person obtaining a
+   copy of this software and associated documentation files (the "Software"),
+   to deal in the Software without restriction, including without limitation
+   the rights to use, copy, modify, merge, publish, distribute, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, provided that the above copyright notice(s) and this
+   permission notice appear in all copies of the Software and that both the
+   above copyright notice(s) and this permission notice appear in supporting
+   documentation.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+   RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+   NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+   DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+   PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+   ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+   THIS SOFTWARE.
+
+   Except as contained in this notice, the name of a copyright holder shall
+   not be used in advertising or otherwise to promote the sale, use or other
+   dealings in this Software without prior written authorization of the
+   copyright holder.
+
+     ----------------------------------------------------------------------
+
+   All trademarks and registered trademarks mentioned herein are the property
+   of their respective owners.
+
+lib/commons-collections-3.2.1.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+lib/commons-logging-1.0.4.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+lib/lucene-misc-2.4.0.jar
+src/plugin/summary-lucene/lib/lucene-highlighter-2.4.0.jar
+lib/lucene-core-2.4.0.jar
+src/plugin/lib-lucene-analyzers/lib/lucene-analyzers-2.4.0.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+
+Some code in src/java/org/apache/lucene/util/UnicodeUtil.java was
+derived from unicode conversion examples available at
+http://www.unicode.org/Public/PROGRAMS/CVTUTF.  Here is the copyright
+from those sources:
+
+/*
+ * Copyright 2001-2004 Unicode, Inc.
+ * 
+ * Disclaimer
+ * 
+ * This source code is provided as is by Unicode, Inc. No claims are
+ * made as to fitness for any particular purpose. No warranties of any
+ * kind are expressed or implied. The recipient agrees to determine
+ * applicability of information provided. If this file has been
+ * purchased on magnetic or optical media from Unicode, Inc., the
+ * sole remedy for any claim will be exchange of defective media
+ * within 90 days of receipt.
+ * 
+ * Limitations on Rights to Redistribute This Code
+ * 
+ * Unicode, Inc. hereby grants the right to freely use the information
+ * supplied in this file in the creation of products supporting the
+ * Unicode Standard, and to make copies of this file in any form
+ * for internal or external distribution as long as this notice
+ * remains attached.
+ */
+
+
+Some code in src/java/org/apache/lucene/util/ArrayUtil.java was
+derived from Python 2.4.2 sources available at
+http://www.python.org. Full license is here:
+
+  http://www.python.org/download/releases/2.4.2/license/
+
+lib/jakarta-oro-2.0.8.jar
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2000-2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
+ *    must not be used to endorse or promote products derived from this
+ *    software without prior written permission. For written
+ *    permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache" 
+ *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
+ *    name, without prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+lib/jetty-ext/commons-el.jar
+
+/*
+ * $Header: /home/cvs/jakarta-commons/el/LICENSE.txt,v 1.1.1.1 2003/02/04 00:22:24 luehe Exp $
+ * $Revision: 1.1.1.1 $
+ * $Date: 2003/02/04 00:22:24 $
+ *
+ * ====================================================================
+ *
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 1999-2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ *    any, must include the following acknowlegement:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowlegement may appear in the software itself,
+ *    if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "The Jakarta Project", "Commons", and "Apache Software
+ *    Foundation" must not be used to endorse or promote products derived
+ *    from this software without prior written permission. For written
+ *    permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ *    nor may "Apache" appear in their names without prior written
+ *    permission of the Apache Group.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+lib/jetty-ext/ant.jar
+
+/*
+ *                                 Apache License
+ *                           Version 2.0, January 2004
+ *                        http://www.apache.org/licenses/
+ *
+ *   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+ *
+ *   1. Definitions.
+ *
+ *      "License" shall mean the terms and conditions for use, reproduction,
+ *      and distribution as defined by Sections 1 through 9 of this document.
+ *
+ *      "Licensor" shall mean the copyright owner or entity authorized by
+ *      the copyright owner that is granting the License.
+ *
+ *      "Legal Entity" shall mean the union of the acting entity and all
+ *      other entities that control, are controlled by, or are under common
+ *      control with that entity. For the purposes of this definition,
+ *      "control" means (i) the power, direct or indirect, to cause the
+ *      direction or management of such entity, whether by contract or
+ *      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ *      outstanding shares, or (iii) beneficial ownership of such entity.
+ *
+ *      "You" (or "Your") shall mean an individual or Legal Entity
+ *      exercising permissions granted by this License.
+ *
+ *      "Source" form shall mean the preferred form for making modifications,
+ *      including but not limited to software source code, documentation
+ *      source, and configuration files.
+ *
+ *      "Object" form shall mean any form resulting from mechanical
+ *      transformation or translation of a Source form, including but
+ *      not limited to compiled object code, generated documentation,
+ *      and conversions to other media types.
+ *
+ *      "Work" shall mean the work of authorship, whether in Source or
+ *      Object form, made available under the License, as indicated by a
+ *      copyright notice that is included in or attached to the work
+ *      (an example is provided in the Appendix below).
+ *
+ *      "Derivative Works" shall mean any work, whether in Source or Object
+ *      form, that is based on (or derived from) the Work and for which the
+ *      editorial revisions, annotations, elaborations, or other modifications
+ *      represent, as a whole, an original work of authorship. For the purposes
+ *      of this License, Derivative Works shall not include works that remain
+ *      separable from, or merely link (or bind by name) to the interfaces of,
+ *      the Work and Derivative Works thereof.
+ *
+ *      "Contribution" shall mean any work of authorship, including
+ *      the original version of the Work and any modifications or additions
+ *      to that Work or Derivative Works thereof, that is intentionally
+ *      submitted to Licensor for inclusion in the Work by the copyright owner
+ *      or by an individual or Legal Entity authorized to submit on behalf of
+ *      the copyright owner. For the purposes of this definition, "submitted"
+ *      means any form of electronic, verbal, or written communication sent
+ *      to the Licensor or its representatives, including but not limited to
+ *      communication on electronic mailing lists, source code control systems,
+ *      and issue tracking systems that are managed by, or on behalf of, the
+ *      Licensor for the purpose of discussing and improving the Work, but
+ *      excluding communication that is conspicuously marked or otherwise
+ *      designated in writing by the copyright owner as "Not a Contribution."
+ *
+ *      "Contributor" shall mean Licensor and any individual or Legal Entity
+ *      on behalf of whom a Contribution has been received by Licensor and
+ *      subsequently incorporated within the Work.
+ *
+ *   2. Grant of Copyright License. Subject to the terms and conditions of
+ *      this License, each Contributor hereby grants to You a perpetual,
+ *      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ *      copyright license to reproduce, prepare Derivative Works of,
+ *      publicly display, publicly perform, sublicense, and distribute the
+ *      Work and such Derivative Works in Source or Object form.
+ *
+ *   3. Grant of Patent License. Subject to the terms and conditions of
+ *      this License, each Contributor hereby grants to You a perpetual,
+ *      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ *      (except as stated in this section) patent license to make, have made,
+ *      use, offer to sell, sell, import, and otherwise transfer the Work,
+ *      where such license applies only to those patent claims licensable
+ *      by such Contributor that are necessarily infringed by their
+ *      Contribution(s) alone or by combination of their Contribution(s)
+ *      with the Work to which such Contribution(s) was submitted. If You
+ *      institute patent litigation against any entity (including a
+ *      cross-claim or counterclaim in a lawsuit) alleging that the Work
+ *      or a Contribution incorporated within the Work constitutes direct
+ *      or contributory patent infringement, then any patent licenses
+ *      granted to You under this License for that Work shall terminate
+ *      as of the date such litigation is filed.
+ *
+ *   4. Redistribution. You may reproduce and distribute copies of the
+ *      Work or Derivative Works thereof in any medium, with or without
+ *      modifications, and in Source or Object form, provided that You
+ *      meet the following conditions:
+ *
+ *      (a) You must give any other recipients of the Work or
+ *          Derivative Works a copy of this License; and
+ *
+ *      (b) You must cause any modified files to carry prominent notices
+ *          stating that You changed the files; and
+ *
+ *      (c) You must retain, in the Source form of any Derivative Works
+ *          that You distribute, all copyright, patent, trademark, and
+ *          attribution notices from the Source form of the Work,
+ *          excluding those notices that do not pertain to any part of
+ *          the Derivative Works; and
+ *
+ *      (d) If the Work includes a "NOTICE" text file as part of its
+ *          distribution, then any Derivative Works that You distribute must
+ *          include a readable copy of the attribution notices contained
+ *          within such NOTICE file, excluding those notices that do not
+ *          pertain to any part of the Derivative Works, in at least one
+ *          of the following places: within a NOTICE text file distributed
+ *          as part of the Derivative Works; within the Source form or
+ *          documentation, if provided along with the Derivative Works; or,
+ *          within a display generated by the Derivative Works, if and
+ *          wherever such third-party notices normally appear. The contents
+ *          of the NOTICE file are for informational purposes only and
+ *          do not modify the License. You may add Your own attribution
+ *          notices within Derivative Works that You distribute, alongside
+ *          or as an addendum to the NOTICE text from the Work, provided
+ *          that such additional attribution notices cannot be construed
+ *          as modifying the License.
+ *
+ *      You may add Your own copyright statement to Your modifications and
+ *      may provide additional or different license terms and conditions
+ *      for use, reproduction, or distribution of Your modifications, or
+ *      for any such Derivative Works as a whole, provided Your use,
+ *      reproduction, and distribution of the Work otherwise complies with
+ *      the conditions stated in this License.
+ *
+ *   5. Submission of Contributions. Unless You explicitly state otherwise,
+ *      any Contribution intentionally submitted for inclusion in the Work
+ *      by You to the Licensor shall be under the terms and conditions of
+ *      this License, without any additional terms or conditions.
+ *      Notwithstanding the above, nothing herein shall supersede or modify
+ *      the terms of any separate license agreement you may have executed
+ *      with Licensor regarding such Contributions.
+ *
+ *   6. Trademarks. This License does not grant permission to use the trade
+ *      names, trademarks, service marks, or product names of the Licensor,
+ *      except as required for reasonable and customary use in describing the
+ *      origin of the Work and reproducing the content of the NOTICE file.
+ *
+ *   7. Disclaimer of Warranty. Unless required by applicable law or
+ *      agreed to in writing, Licensor provides the Work (and each
+ *      Contributor provides its Contributions) on an "AS IS" BASIS,
+ *      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ *      implied, including, without limitation, any warranties or conditions
+ *      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ *      PARTICULAR PURPOSE. You are solely responsible for determining the
+ *      appropriateness of using or redistributing the Work and assume any
+ *      risks associated with Your exercise of permissions under this License.
+ *
+ *   8. Limitation of Liability. In no event and under no legal theory,
+ *      whether in tort (including negligence), contract, or otherwise,
+ *      unless required by applicable law (such as deliberate and grossly
+ *      negligent acts) or agreed to in writing, shall any Contributor be
+ *      liable to You for damages, including any direct, indirect, special,
+ *      incidental, or consequential damages of any character arising as a
+ *      result of this License or out of the use or inability to use the
+ *      Work (including but not limited to damages for loss of goodwill,
+ *      work stoppage, computer failure or malfunction, or any and all
+ *      other commercial damages or losses), even if such Contributor
+ *      has been advised of the possibility of such damages.
+ *
+ *   9. Accepting Warranty or Additional Liability. While redistributing
+ *      the Work or Derivative Works thereof, You may choose to offer,
+ *      and charge a fee for, acceptance of support, warranty, indemnity,
+ *      or other liability obligations and/or rights consistent with this
+ *      License. However, in accepting such obligations, You may act only
+ *      on Your own behalf and on Your sole responsibility, not on behalf
+ *      of any other Contributor, and only if You agree to indemnify,
+ *      defend, and hold each Contributor harmless for any liability
+ *      incurred by, or claims asserted against, such Contributor by reason
+ *      of your accepting any such warranty or additional liability.
+ *
+ *   END OF TERMS AND CONDITIONS
+ *
+ *   APPENDIX: How to apply the Apache License to your work.
+ *
+ *      To apply the Apache License to your work, attach the following
+ *      boilerplate notice, with the fields enclosed by brackets "[]"
+ *      replaced with your own identifying information. (Don't include
+ *      the brackets!)  The text should be enclosed in the appropriate
+ *      comment syntax for the file format. We also recommend that a
+ *      file or class name and description of purpose be included on the
+ *      same "printed page" as the copyright notice for easier
+ *      identification within third-party archives.
+ *
+ *   Copyright [yyyy] [name of copyright owner]
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ */
+
+
+lib/jetty-ext/jsp-api.jar
+ASF
+
+lib/jetty-ext/jasper-runtime.jar
+ASF
+
+lib/jetty-ext/jasper-compiler.jar
+ASF
+
+lib/hadoop-0.19.1-core.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+APACHE HADOOP SUBCOMPONENTS:
+
+The Apache Hadoop project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses. 
+
+For the org.apache.hadoop.util.bloom.* classes:
+
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract
+ * 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+lib/apache-solr-common-1.3.0.jar
+lib/apache-solr-solrj-1.3.0.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+==========================================================================
+Portions of Jetty 6 are bundled in the Solr example server.
+Jetty 6 includes a binary javax.servlet package licensed under the
+Common Development and Distribution License.
+--------------------------------------------------------------------------
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+
+1. Definitions.
+
+1.1. Contributor means each individual or entity that creates or contributes to
+the creation of Modifications.
+
+1.2. Contributor Version means the combination of the Original Software, prior
+Modifications used by a Contributor (if any), and the Modifications made by
+that particular Contributor.
+
+1.3. Covered Software means (a) the Original Software, or (b) Modifications, or
+(c) the combination of files containing Original Software with files containing
+Modifications, in each case including portions thereof.
+
+1.4. Executable means the Covered Software in any form other than Source Code.
+
+1.5. Initial Developer means the individual or entity that first makes Original
+Software available under this License.
+
+1.6. Larger Work means a work which combines Covered Software or portions
+thereof with code not governed by the terms of this License.
+
+1.7. License means this document.
+
+1.8. Licensable means having the right to grant, to the maximum extent
+possible, whether at the time of the initial grant or subsequently acquired,
+any and all of the rights conveyed herein.
+
+1.9. Modifications means the Source Code and Executable form of any of the
+following:
+
+A. Any file that results from an addition to, deletion from or modification of
+the contents of a file containing Original Software or previous Modifications;
+
+B. Any new file that contains any part of the Original Software or previous
+Modification; or
+
+C. Any new file that is contributed or otherwise made available under the terms
+of this License.
+
+1.10. Original Software means the Source Code and Executable form of computer
+software code that is originally released under this License.
+
+1.11. Patent Claims means any patent claim(s), now owned or hereafter acquired,
+including without limitation, method, process, and apparatus claims, in any
+patent Licensable by grantor.
+
+1.12. Source Code means (a) the common form of computer software code in which
+modifications are made and (b) associated documentation included in or with
+such code.
+
+1.13. You (or Your) means an individual or a legal entity exercising rights
+under, and complying with all of the terms of, this License. For legal
+entities, You includes any entity which controls, is controlled by, or is under
+common control with You. For purposes of this definition, control means (a)áthe
+power, direct or indirect, to cause the direction or management of such entity,
+whether by contract or otherwise, or (b)áownership of more than fifty percent
+(50%) of the outstanding shares or beneficial ownership of such entity.
+
+2. License Grants.
+
+2.1. The Initial Developer Grant.  Conditioned upon Your compliance with
+Section 3.1 below and subject to third party intellectual property claims, the
+Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive
+license: (a) under intellectual property rights (other than patent or
+trademark) Licensable by Initial Developer, to use, reproduce, modify, display,
+perform, sublicense and distribute the Original Software (or portions thereof),
+with or without Modifications, and/or as part of a Larger Work; and (b) under
+Patent Claims infringed by the making, using or selling of Original Software,
+to make, have made, use, practice, sell, and offer for sale, and/or otherwise
+dispose of the Original Software (or portions thereof).  (c) The licenses
+granted in Sectionsá2.1(a) and (b) are effective on the date Initial Developer
+first distributes or otherwise makes the Original Software available to a third
+party under the terms of this License.  (d) Notwithstanding Sectioná2.1(b)
+above, no patent license is granted: (1)áfor code that You delete from the
+Original Software, or (2)áfor infringements caused by: (i)áthe modification of
+the Original Software, or (ii)áthe combination of the Original Software with
+other software or devices.
+
+2.2. Contributor Grant.  Conditioned upon Your compliance with Section 3.1
+below and subject to third party intellectual property claims, each Contributor
+hereby grants You a world-wide, royalty-free, non-exclusive license: (a) under
+intellectual property rights (other than patent or trademark) Licensable by
+Contributor to use, reproduce, modify, display, perform, sublicense and
+distribute the Modifications created by such Contributor (or portions thereof),
+either on an unmodified basis, with other Modifications, as Covered Software
+and/or as part of a Larger Work; and (b) under Patent Claims infringed by the
+making, using, or selling of Modifications made by that Contributor either
+alone and/or in combination with its Contributor Version (or portions of such
+combination), to make, use, sell, offer for sale, have made, and/or otherwise
+dispose of: (1)áModifications made by that Contributor (or portions thereof);
+and (2)áthe combination of Modifications made by that Contributor with its
+Contributor Version (or portions of such combination).  (c) The licenses
+granted in Sectionsá2.2(a) and 2.2(b) are effective on the date Contributor
+first distributes or otherwise makes the Modifications available to a third
+party.  (d) Notwithstanding Sectioná2.2(b) above, no patent license is granted:
+(1)áfor any code that Contributor has deleted from the Contributor Version;
+(2)áfor infringements caused by: (i)áthird party modifications of Contributor
+Version, or (ii)áthe combination of Modifications made by that Contributor with
+other software (except as part of the Contributor Version) or other devices; or
+(3)áunder Patent Claims infringed by Covered Software in the absence of
+Modifications made by that Contributor.
+
+3. Distribution Obligations.
+
+3.1. Availability of Source Code.
+
+Any Covered Software that You distribute or otherwise make available in
+Executable form must also be made available in Source Code form and that Source
+Code form must be distributed only under the terms of this License. You must
+include a copy of this License with every copy of the Source Code form of the
+Covered Software You distribute or otherwise make available. You must inform
+recipients of any such Covered Software in Executable form as to how they can
+obtain such Covered Software in Source Code form in a reasonable manner on or
+through a medium customarily used for software exchange.
+
+3.2. Modifications.
+
+The Modifications that You create or to which You contribute are governed by
+the terms of this License. You represent that You believe Your Modifications
+are Your original creation(s) and/or You have sufficient rights to grant the
+rights conveyed by this License.
+
+3.3. Required Notices.  You must include a notice in each of Your Modifications
+that identifies You as the Contributor of the Modification. You may not remove
+or alter any copyright, patent or trademark notices contained within the
+Covered Software, or any notices of licensing or any descriptive text giving
+attribution to any Contributor or the Initial Developer.
+
+3.4. Application of Additional Terms.  You may not offer or impose any terms on
+any Covered Software in Source Code form that alters or restricts the
+applicable version of this License or the recipients rights hereunder. You may
+choose to offer, and to charge a fee for, warranty, support, indemnity or
+liability obligations to one or more recipients of Covered Software. However,
+you may do so only on Your own behalf, and not on behalf of the Initial
+Developer or any Contributor. You must make it absolutely clear that any such
+warranty, support, indemnity or liability obligation is offered by You alone,
+and You hereby agree to indemnify the Initial Developer and every Contributor
+for any liability incurred by the Initial Developer or such Contributor as a
+result of warranty, support, indemnity or liability terms You offer.
+
+3.5. Distribution of Executable Versions.  You may distribute the Executable
+form of the Covered Software under the terms of this License or under the terms
+of a license of Your choice, which may contain terms different from this
+License, provided that You are in compliance with the terms of this License and
+that the license for the Executable form does not attempt to limit or alter the
+recipients rights in the Source Code form from the rights set forth in this
+License. If You distribute the Covered Software in Executable form under a
+different license, You must make it absolutely clear that any terms which
+differ from this License are offered by You alone, not by the Initial Developer
+or Contributor. You hereby agree to indemnify the Initial Developer and every
+Contributor for any liability incurred by the Initial Developer or such
+Contributor as a result of any such terms You offer.
+
+3.6. Larger Works.  You may create a Larger Work by combining Covered Software
+with other code not governed by the terms of this License and distribute the
+Larger Work as a single product. In such a case, You must make sure the
+requirements of this License are fulfilled for the Covered Software.
+
+4. Versions of the License.
+
+4.1. New Versions.  Sun Microsystems, Inc. is the initial license steward and
+may publish revised and/or new versions of this License from time to time. Each
+version will be given a distinguishing version number. Except as provided in
+Section 4.3, no one other than the license steward has the right to modify this
+License.
+
+4.2. Effect of New Versions.
+
+You may always continue to use, distribute or otherwise make the Covered
+Software available under the terms of the version of the License under which
+You originally received the Covered Software. If the Initial Developer includes
+a notice in the Original Software prohibiting it from being distributed or
+otherwise made available under any subsequent version of the License, You must
+distribute and make the Covered Software available under the terms of the
+version of the License under which You originally received the Covered
+Software. Otherwise, You may also choose to use, distribute or otherwise make
+the Covered Software available under the terms of any subsequent version of the
+License published by the license steward.  4.3. Modified Versions.
+
+When You are an Initial Developer and You want to create a new license for Your
+Original Software, You may create and use a modified version of this License if
+You: (a)árename the license and remove any references to the name of the
+license steward (except to note that the license differs from this License);
+and (b)áotherwise make it clear that the license contains terms which differ
+from this License.
+
+5. DISCLAIMER OF WARRANTY.
+
+COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT
+WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT
+LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS,
+MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK
+AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD
+ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL
+DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING,
+REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
+OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT
+UNDER THIS DISCLAIMER.
+
+6. TERMINATION.
+
+6.1. This License and the rights granted hereunder will terminate automatically
+if You fail to comply with terms herein and fail to cure such breach within 30
+days of becoming aware of the breach. Provisions which, by their nature, must
+remain in effect beyond the termination of this License shall survive.
+
+6.2. If You assert a patent infringement claim (excluding declaratory judgment
+actions) against Initial Developer or a Contributor (the Initial Developer or
+Contributor against whom You assert such claim is referred to as Participant)
+alleging that the Participant Software (meaning the Contributor Version where
+the Participant is a Contributor or the Original Software where the Participant
+is the Initial Developer) directly or indirectly infringes any patent, then any
+and all rights granted directly or indirectly to You by such Participant, the
+Initial Developer (if the Initial Developer is not the Participant) and all
+Contributors under Sectionsá2.1 and/or 2.2 of this License shall, upon 60 days
+notice from Participant terminate prospectively and automatically at the
+expiration of such 60 day notice period, unless if within such 60 day period
+You withdraw Your claim with respect to the Participant Software against such
+Participant either unilaterally or pursuant to a written agreement with
+Participant.
+
+6.3. In the event of termination under Sectionsá6.1 or 6.2 above, all end user
+licenses that have been validly granted by You or any distributor hereunder
+prior to termination (excluding licenses granted to You by any distributor)
+shall survive termination.
+
+7. LIMITATION OF LIABILITY.
+
+UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING
+NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY
+OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF
+ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL,
+INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
+LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER
+FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN
+IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS
+LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
+INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW
+PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR
+LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND
+LIMITATION MAY NOT APPLY TO YOU.
+
+8. U.S. GOVERNMENT END USERS.
+
+The Covered Software is a commercial item, as that term is defined in
+48áC.F.R.á2.101 (Oct. 1995), consisting of commercial computer software (as
+that term is defined at 48 C.F.R. á252.227-7014(a)(1)) and commercial computer
+software documentation as such terms are used in 48áC.F.R.á12.212 (Sept. 1995).
+Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4
+(June 1995), all U.S. Government End Users acquire Covered Software with only
+those rights set forth herein. This U.S. Government Rights clause is in lieu
+of, and supersedes, any other FAR, DFAR, or other clause or provision that
+addresses Government rights in computer software under this License.
+
+9. MISCELLANEOUS.
+
+This License represents the complete agreement concerning subject matter
+hereof. If any provision of this License is held to be unenforceable, such
+provision shall be reformed only to the extent necessary to make it
+enforceable. This License shall be governed by the law of the jurisdiction
+specified in a notice contained within the Original Software (except to the
+extent applicable law, if any, provides otherwise), excluding such
+jurisdictions conflict-of-law provisions. Any litigation relating to this
+License shall be subject to the jurisdiction of the courts located in the
+jurisdiction and venue specified in a notice contained within the Original
+Software, with the losing party responsible for costs, including, without
+limitation, court costs and reasonable attorneys fees and expenses. The
+application of the United Nations Convention on Contracts for the International
+Sale of Goods is expressly excluded. Any law or regulation which provides that
+the language of a contract shall be construed against the drafter shall not
+apply to this License. You agree that You alone are responsible for compliance
+with the United States export administration regulations (and the export
+control laws and regulation of any other countries) when You use, distribute or
+otherwise make available any Covered Software.
+
+10. RESPONSIBILITY FOR CLAIMS.
+
+As between Initial Developer and the Contributors, each party is responsible
+for claims and damages arising, directly or indirectly, out of its utilization
+of rights under this License and You agree to work with Initial Developer and
+Contributors to distribute such responsibility on an equitable basis. Nothing
+herein is intended or shall be deemed to constitute any admission of liability.
+
+NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
+(CDDL) The GlassFish code released under the CDDL shall be governed by the laws
+of the State of California (excluding conflict-of-law provisions). Any
+litigation relating to this License shall be subject to the jurisdiction of the
+Federal Courts of the Northern District of California and the state courts of
+the State of California, with venue lying in Santa Clara County, California. 
+
+
+==========================================================================
+The following license applies to parts of the lucene-snowball jar
+that are generated from the snowball sources at http://snowball.tartarus.org/
+--------------------------------------------------------------------------
+The BSD License
+
+Copyright (c) 2001, Dr Martin Porter, Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    * Neither the name of the <ORGANIZATION> nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+==========================================================================
+The following license applies to easymock.jar
+--------------------------------------------------------------------------
+EasyMock 2 License (MIT License)
+Copyright (c) 2001-2007 OFFIS, Tammo Freese.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of 
+this software and associated documentation files (the "Software"), to deal in 
+the Software without restriction, including without limitation the rights to 
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
+of the Software, and to permit persons to whom the Software is furnished to do 
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all 
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
+SOFTWARE. 
+
+==========================================================================
+The following license applies to the JQuery JavaScript library
+--------------------------------------------------------------------------
+Copyright (c) 2008 John Resig, http://jquery.com/
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+==========================================================================
+The following license applies to stax-utils.jar
+--------------------------------------------------------------------------
+Copyright (c) 2004, Christian Niles, unit12.net
+Copyright (c) 2004, Sun Microsystems, Inc.
+Copyright (c) 2006, John Kristian 
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of the listed copyright holders nor the names
+      of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written
+      permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+==========================================================================
+The following license applies to JUnit
+--------------------------------------------------------------------------
+Common Public License - v 1.0
+
+THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+      a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and
+      b) in the case of each subsequent Contributor:
+
+      i) changes to the Program, and
+
+      ii) additions to the Program;
+
+      where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program. 
+
+"Contributor" means any person or entity that distributes the Program.
+
+"Licensed Patents " mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.
+
+"Program" means the Contributions distributed in accordance with this Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement, including all Contributors.
+
+2. GRANT OF RIGHTS
+
+      a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.
+
+      b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder. 
+
+      c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.
+
+      d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement. 
+
+3. REQUIREMENTS
+
+A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:
+
+      a) it complies with the terms and conditions of this Agreement; and
+
+      b) its license agreement:
+
+      i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose; 
+
+      ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits; 
+
+      iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and
+
+      iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. 
+
+When the Program is made available in source code form:
+
+      a) it must be made available under this Agreement; and 
+
+      b) a copy of this Agreement must be included with each copy of the Program. 
+
+Contributors may not remove or alter any copyright notices contained within the Program.
+
+Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against a Contributor with respect to a patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then any patent licenses granted by that Contributor to such Recipient under this Agreement shall terminate as of the date such litigation is filed. In addition, if Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. IBM is the initial Agreement Steward. IBM may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.
+
+
+lib/xerces-2_6_2-apis.jar
+
+xml-commons/java/external/LICENSE.sax.txt $Id: LICENSE.sax.txt,v 1.1 2002/01/31 23:26:48 curcuru Exp $
+
+
+This license came from: http://www.megginson.com/SAX/copying.html
+  However please note future versions of SAX may be covered 
+  under http://saxproject.org/?selected=pd
+
+
+This page is now out of date -- see the new SAX site at 
+http://www.saxproject.org/ for more up-to-date
+releases and other information. Please change your bookmarks.
+
+
+SAX2 is Free!
+
+I hereby abandon any property rights to SAX 2.0 (the Simple API for
+XML), and release all of the SAX 2.0 source code, compiled code, and
+documentation contained in this distribution into the Public Domain.
+SAX comes with NO WARRANTY or guarantee of fitness for any
+purpose.
+
+David Megginson, david@megginson.com
+2000-05-05
+
+
+xml-commons/java/external/LICENSE.dom-software.txt $Id: LICENSE.dom-software.txt,v 1.1 2002/01/31 23:13:42 curcuru Exp $
+
+
+This license came from: http://www.w3.org/Consortium/Legal/copyright-software-19980720
+
+
+W3C® SOFTWARE NOTICE AND LICENSE
+Copyright © 1994-2001 World
+Wide Web Consortium, <a href="http://www.w3.org/">World
+Wide Web Consortium</a>, (<a href=
+"http://www.lcs.mit.edu/">Massachusetts Institute of
+Technology</a>, <a href="http://www.inria.fr/">Institut National de
+Recherche en Informatique et en Automatique</a>, <a href=
+"http://www.keio.ac.jp/">Keio University</a>). All Rights Reserved.
+http://www.w3.org/Consortium/Legal/
+
+This W3C work (including software, documents, or other related
+items) is being provided by the copyright holders under the
+following license. By obtaining, using and/or copying this work,
+you (the licensee) agree that you have read, understood, and will
+comply with the following terms and conditions:
+Permission to use, copy, modify, and distribute this software
+and its documentation, with or without modification,  for any
+purpose and without fee or royalty is hereby granted, provided that
+you include the following on ALL copies of the software and
+documentation or portions thereof, including modifications, that
+you make:
+
+The full text of this NOTICE in a location viewable to users of
+the redistributed or derivative work.
+
+Any pre-existing intellectual property disclaimers, notices, or
+terms and conditions. If none exist, a short notice of the
+following form (hypertext is preferred, text is permitted) should
+be used within the body of any redistributed or derivative code:
+"Copyright © [$date-of-software] World Wide Web Consortium, (Massachusetts Institute of
+Technology, Institut National de
+Recherche en Informatique et en Automatique, Keio University). All Rights Reserved.
+http://www.w3.org/Consortium/Legal/"
+
+Notice of any changes or modifications to the W3C files,
+including the date changes were made. (We recommend you provide 
+URIs to the location from which the code is derived.)
+
+THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND
+COPYRIGHT HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO, WARRANTIES OF
+MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE
+USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY THIRD
+PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
+COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT,
+SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE
+SOFTWARE OR DOCUMENTATION.
+
+The name and trademarks of copyright holders may NOT be used in
+advertising or publicity pertaining to the software without
+specific, written prior permission. Title to copyright in this
+software and any associated documentation will at all times remain
+with copyright holders.
+____________________________________
+This formulation of W3C's notice and license became active on
+August 14 1998 so as to improve compatibility with GPL. This
+version ensures that W3C software licensing terms are no more
+restrictive than GPL and consequently W3C software may be
+distributed in GPL packages. See the older formulation for the
+policy prior to this date. Please see our Copyright FAQ for common 
+questions about using materials from
+our site, including specific terms and conditions for packages like
+libwww, Amaya, and Jigsaw. 
+Other questions about this notice can be
+directed to site-policy@w3.org.
+
+webmaster
+(last updated $Date: 2002/01/31 23:13:42 $)
+
+lib/commons-cli-2.0-SNAPSHOT.jar
+
+                   Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+lib/commons-codec-1.3.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+lib/xerces-2_6_2.jar
+ASF
+
+lib/jetty-5.1.4.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+lib/commons-lang-2.1.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+lib/commons-beanutils-1.8.0.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+lib/commons-logging-api-1.0.4.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+lib/servlet-api.jar
+ASF
+
+lib/jets3t-0.6.1.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+lib/tika-0.1-incubating.jar
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+lib/junit-3.8.1.jar
+
+Common Public License Version 1.0
+
+THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+    a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and
+
+    b) in the case of each subsequent Contributor:
+
+    i) changes to the Program, and
+
+    ii) additions to the Program;
+
+    where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program.
+
+"Contributor" means any person or entity that distributes the Program.
+
+"Licensed Patents " mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.
+
+"Program" means the Contributions distributed in accordance with this Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement, including all Contributors.
+
+2. GRANT OF RIGHTS
+
+    a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.
+
+    b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder.
+
+    c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.
+
+    d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement.
+
+3. REQUIREMENTS
+
+A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:
+
+    a) it complies with the terms and conditions of this Agreement; and
+
+    b) its license agreement:
+
+    i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose;
+
+    ii) effectively excludes on behalf of all Cntributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits;
+
+    iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and
+
+    iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. 
+
+When the Program is made available in source code form:
+
+    a) it must be made available under this Agreement; and
+
+    b) a copy of this Agreement must be included with each copy of the Program. 
+
+Contributors may not remove or alter any copyright notices contained within the Program.
+
+Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PR LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against a Contributor with respect to a patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then any patent licenses granted by that Contributor to such Recipient under this Agreement shall terminate as of the date such litigation is filed. In addition, if Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. IBM is the initial Agreement Steward. IBM may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.
+OFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against a Contributor with respect to a patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then any patent licenses granted by that Contributor to such Recipient under this Agreement shall terminate as of the date such litigation is filed. In addition, if Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. IBM is the initial Agreement Steward. IBM may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.
+
+lib/taglibs-i18n.jar
+ASF
+
+lib/log4j-1.2.15.jar
+ASF
+
+src/plugin/feed/lib/rome-0.9.jar
+Copyright 2004 Sun Microsystems, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+
+
+Office Open XML (OOXML) xsds:
+-----------------------------
+
+These were downloaded as part of the Office Open XML ECMA Specification 
+from <http://www.ecma-international.org/publications/standards/Ecma-376.htm>
+
+These are included within the Apache POI distribution, and are available
+under compatible licensing terms.
+
+Copyright - ECMA International, "made available without restriction"
+    http://www.ecma-international.org/memento/Ecmabylaws.htm - section 9.4
+Patent License - Microsoft Open Specification Promise (OSP) 
+    http://www.microsoft.com/interop/osp/
+
+
+src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+
+
+Office Open XML (OOXML) xsds:
+-----------------------------
+
+These were downloaded as part of the Office Open XML ECMA Specification 
+from <http://www.ecma-international.org/publications/standards/Ecma-376.htm>
+
+These are included within the Apache POI distribution, and are available
+under compatible licensing terms.
+
+Copyright - ECMA International, "made available without restriction"
+    http://www.ecma-international.org/memento/Ecmabylaws.htm - section 9.4
+Patent License - Microsoft Open Specification Promise (OSP) 
+    http://www.microsoft.com/interop/osp/
+
+src/plugin/urlfilter-automaton/lib/automaton.jar
+
+dk.brics.automaton
+------------------
+
+Copyright (C) 2001-2004 Anders Moeller
+
+This source code in this package may be used under the terms of the
+BSD license.  Please read the file 'COPYING' for details.
+
+This package contains a full DFA/NFA implementation with Unicode
+alphabet and support for all standard regular expression operations.
+
+For more information, go to the package home page at
+http://www.brics.dk/~amoeller/automaton/
+
+
+Anders Moeller
+amoeller@brics.dk
+
+src/plugin/lib-nekohtml/lib/nekohtml-0.9.4.jar
+
+The CyberNeko Software License, Version 1.0
+
+ 
+(C) Copyright 2002,2003, Andy Clark.  All rights reserved.
+ 
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer. 
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the
+   distribution.
+
+3. The end-user documentation included with the redistribution,
+   if any, must include the following acknowledgment:  
+     "This product includes software developed by Andy Clark."
+   Alternately, this acknowledgment may appear in the software itself,
+   if and wherever such third-party acknowledgments normally appear.
+
+4. The names "CyberNeko" and "NekoHTML" must not be used to endorse
+   or promote products derived from this software without prior 
+   written permission. For written permission, please contact 
+   andy@cyberneko.net.
+
+5. Products derived from this software may not be called "CyberNeko",
+   nor may "CyberNeko" appear in their name, without prior written
+   permission of the author.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+====================================================================
+
+This license is based on the Apache Software License, version 1.1.
+
+
+src/plugin/clustering-carrot2/lib/violinstrings-1.0.2.jar
+
+Copyright (c) Michael Schmeling 1998, 2000 - All Rights Reserved
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, provided that the above copyright notice(s) and this
+permission notice appear in all copies of the Software and that both the
+above copyright notice(s) and this permission notice appear in supporting
+documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder shall
+not be used in advertising or otherwise to promote the sale, use or other
+dealings in this Software without prior written authorization of the
+copyright holder.
+
+src/plugin/clustering-carrot2/lib/carrot2-util-common.jar
+src/plugin/clustering-carrot2/lib/carrot2-filter-lingo.jar
+src/plugin/clustering-carrot2/lib/carrot2-snowball-stemmers.jar
+src/plugin/clustering-carrot2/lib/carrot2-util-tokenizer.jar
+src/plugin/clustering-carrot2/lib/carrot2-local-core.jar
+
+
+Carrot2 Project
+
+Copyright (C) 2002-2006, Dawid Weiss, Stanis�aw Osi�ski.
+Portions (C) Contributors listed in "carrot2.CONTRIBUTORS" file.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+- Redistributions of  source code must  retain the above  copyright notice, this
+  list of conditions and the following disclaimer. 
+
+- Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following  disclaimer in  the documentation  and/or
+  other materials provided with the distribution. 
+
+- Neither the name  of the Poznan University  of Technology, Poznan, Poland  nor
+  the names  of  its contributors may  be used  to endorse  or promote  products 
+  derived from this software without specific prior written permission.
+
+- We request that  you include in the  end-user documentation provided with  the
+  redistribution and/or in the software itself  an acknowledgement equivalent to
+  the following: "This  product  includes  software  developed  by  the  Carrot2
+  Project."
+  
+- No algorithms or technical solutions in the project may be patented or claimed
+  proprietary.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"  AND
+ANY EXPRESS OR  IMPLIED WARRANTIES, INCLUDING,  BUT NOT LIMITED  TO, THE IMPLIED
+WARRANTIES  OF  MERCHANTABILITY  AND  FITNESS  FOR  A  PARTICULAR  PURPOSE   ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE  FOR
+ANY DIRECT, INDIRECT, INCIDENTAL,  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  DAMAGES
+(INCLUDING, BUT  NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE  GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS;  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND  ON
+ANY  THEORY  OF  LIABILITY,  WHETHER  IN  CONTRACT,  STRICT  LIABILITY,  OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY  OUT OF THE USE  OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+src/plugin/clustering-carrot2/lib/commons-collections-3.2.jar
+ASF
+
+src/plugin/clustering-carrot2/lib/Jama-1.0.2.jar
+
+Copyright Notice This software is a cooperative product of The MathWorks and the 
+National Institute of Standards and Technology (NIST) which has been released to 
+the public domain. Neither The MathWorks nor NIST assumes any responsibility 
+whatsoever for its use by other parties, and makes no guarantees, expressed or 
+implied, about its quality, reliability, or any other characteristic. 
+
+src/plugin/clustering-carrot2/lib/commons-pool-1.3.jar
+ASF
+
+src/plugin/protocol-ftp/lib/commons-net-1.2.0-dev.jar
+ASF
+
+src/plugin/ontology/lib/jena-2.1.jar
+
+/*
+ * (c) Copyright 2000, 2001, 2002, 2003, 2004 Hewlett-Packard Development Company, LP
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+This product includes software developed by the
+Apache Software Foundation (http://www.apache.org/).
+
+src/plugin/parse-swf/lib/javaswf.jar
+
+
+  Copyright (c) 2001-2005, David N. Main, All rights reserved.
+  
+  Redistribution and use in source and binary forms, with or
+  without modification, are permitted provided that the 
+  following conditions are met:
+ 
+  1. Redistributions of source code must retain the above 
+  copyright notice, this list of conditions and the following 
+  disclaimer. 
+  
+  2. Redistributions in binary form must reproduce the above 
+  copyright notice, this list of conditions and the following 
+  disclaimer in the documentation and/or other materials 
+  provided with the distribution.
+  
+  3. The name of the author may not be used to endorse or 
+  promote products derived from this software without specific 
+  prior written permission. 
+  
+  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY 
+  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+  THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
+  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+  AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
+  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
+  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 
+  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+src/plugin/response-json/lib/json-lib-2.2.2-jdk15.jar
+
+                                Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+src/plugin/response-json/lib/ezmorph-1.0.6.jar
+
+                      Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+src/plugin/parse-html/lib/tagsoup-1.2.jar
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+src/plugin/parse-pdf/lib/PDFBox-0.7.4-dev.jar
+
+Copyright (c) 2003-2005, www.pdfbox.org
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of pdfbox; nor the names of its
+   contributors may be used to endorse or promote products derived from this
+   software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+src/plugin/parse-pdf/lib/JempBox-0.2.0.jar
+
+Copyright (c) 2006-2007, www.jempbox.org
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of pdfbox; nor the names of its
+   contributors may be used to endorse or promote products derived from this
+   software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+src/plugin/parse-pdf/lib/FontBox-0.2.0-dev.jar
+
+Copyright (c) 2003-2005, www.fontbox.org
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of fontbox; nor the names of its
+   contributors may be used to endorse or promote products derived from this
+   software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+src/plugin/parse-pdf/lib/bcprov-jdk14-132.jar
+
+Copyright (c) 2000 - 2008 The Legion Of The Bouncy Castle (http://www.bouncycastle.org)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
+
+
+src/plugin/lib-xml/lib/jdom.jar
+
+/*-- 
+
+ $Id: LICENSE.txt,v 1.11 2004/02/06 09:32:57 jhunter Exp $
+
+ Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
+ All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions, and the following disclaimer.
+ 
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions, and the disclaimer that follows 
+    these conditions in the documentation and/or other materials 
+    provided with the distribution.
+
+ 3. The name "JDOM" must not be used to endorse or promote products
+    derived from this software without prior written permission.  For
+    written permission, please contact <request_AT_jdom_DOT_org>.
+ 
+ 4. Products derived from this software may not be called "JDOM", nor
+    may "JDOM" appear in their name, without prior written permission
+    from the JDOM Project Management <request_AT_jdom_DOT_org>.
+ 
+ In addition, we request (but do not require) that you include in the 
+ end-user documentation provided with the redistribution and/or in the 
+ software itself an acknowledgement equivalent to the following:
+     "This product includes software developed by the
+      JDOM Project (http://www.jdom.org/)."
+ Alternatively, the acknowledgment may be graphical using the logos 
+ available at http://www.jdom.org/images/logos.
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many 
+ individuals on behalf of the JDOM Project and was originally 
+ created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
+ Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
+ on the JDOM Project, please see <http://www.jdom.org/>. 
+
+ */
+
+src/plugin/lib-xml/lib/jaxen-jdom.jar
+src/plugin/lib-xml/lib/jaxen-core.jar
+
+/*
+ $Id: LICENSE.txt 1128 2006-02-05 21:49:04Z elharo $
+
+ Copyright 2003-2006 The Werken Company. All Rights Reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+  * Neither the name of the Jaxen Project nor the names of its
+    contributors may be used to endorse or promote products derived 
+    from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+
+src/plugin/lib-xml/lib/saxpath.jar
+
+/*-- 
+
+ $Id: LICENSE,v 1.1 2002/04/26 17:43:56 jstrachan Exp $
+
+ Copyright (C) 2000-2002 werken digital.
+ All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions, and the following disclaimer.
+ 
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions, and the disclaimer that follows 
+    these conditions in the documentation and/or other materials 
+    provided with the distribution.
+
+ 3. The name "SAXPath" must not be used to endorse or promote products
+    derived from this software without prior written permission.  For
+    written permission, please contact license@saxpath.org.
+ 
+ 4. Products derived from this software may not be called "SAXPath", nor
+    may "SAXPath" appear in their name, without prior written permission
+    from the SAXPath Project Management (pm@saxpath.org).
+ 
+ In addition, we request (but do not require) that you include in the 
+ end-user documentation provided with the redistribution and/or in the 
+ software itself an acknowledgement equivalent to the following:
+     "This product includes software developed by the
+      SAXPath Project (http://www.saxpath.org/)."
+ Alternatively, the acknowledgment may be graphical using the logos 
+ available at http://www.saxpath.org/
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED.  IN NO EVENT SHALL THE SAXPath AUTHORS OR THE PROJECT
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+
+ This software consists of voluntary contributions made by many 
+ individuals on behalf of the SAXPath Project and was originally 
+ created by bob mcwhirter <bob@werken.com> and 
+ James Strachan <jstrachan@apache.org>.  For more information on the 
+ SAXPath Project, please see <http://www.saxpath.org/>.
+ 
+ */
+
+src/plugin/lib-xml/lib/xercesImpl.jar
+
+ASF
+
+
+ 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/NOTICE.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,13 @@
+Apache Nutch
+Copyright 2012 The Apache Software Foundation
+
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).
+
+This product includes software developed by the following copyright owners:
+
+Nutch includes JavaSWF:
+Copyright (c) 2001-2005, David N. Main, All rights reserved.
+
+Nutch includes Automaton:
+This package is Copyright © 2001-2008 Anders Møller. All rights reserved.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,36 @@
+Apache Nutch README
+
+For the latest information about Nutch, please visit our website at:
+
+   http://nutch.apache.org
+
+and our wiki, at:
+
+   http://wiki.apache.org/nutch/
+
+To get started using Nutch read Tutorial:
+
+   http://wiki.apache.org/nutch/NutchTutorial
+   
+Export Control
+
+This distribution includes cryptographic software.  The country in which you 
+currently reside may have restrictions on the import, possession, use, and/or 
+re-export to another country, of encryption software.  BEFORE using any encryption 
+software, please check your country's laws, regulations and policies concerning the
+import, possession, or use, and re-export of encryption software, to see if this is 
+permitted.  See <http://www.wassenaar.org/> for more information. 
+
+The U.S. Government Department of Commerce, Bureau of Industry and Security (BIS), has 
+classified this software as Export Commodity Control Number (ECCN) 5D002.C.1, which 
+includes information security software using or performing cryptographic functions with 
+asymmetric algorithms.  The form and manner of this Apache Software Foundation 
+distribution makes it eligible for export under the License Exception ENC Technology 
+Software Unrestricted (TSU) exception (see the BIS Export Administration Regulations, 
+Section 740.13) for both object code and source code.
+
+The following provides more details on the included cryptographic software:
+
+Apache Nutch uses the PDFBox API in its parse-tika plugin for extracting textual content 
+and metadata from encrypted PDF files. See http://pdfbox.apache.org for more 
+details on PDFBox.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,845 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="${name}" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+
+  <!-- Load all the default properties, and any the user wants    -->
+  <!-- to contribute (without having to type -D or edit this file -->
+  <property file="${user.home}/build.properties" />
+  <property file="${basedir}/build.properties" />
+  <property file="${basedir}/default.properties" />
+  <property name="test.junit.output.format" value="plain"/>
+  <property name="release.dir" value="${build.dir}/release"/>
+  
+  <!-- define Maven coordinates, repository url and artifacts name etc -->
+  <property name="groupId" value="org.apache.nutch" />
+  <property name="artifactId" value="nutch" />
+  <property name="maven-repository-url" value="https://repository.apache.org/service/local/staging/deploy/maven2" />
+  <property name="maven-repository-id" value="apache.releases.https" />
+  <property name="maven-jar" value="${release.dir}/${artifactId}-${version}.jar" />
+  <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
+  <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" />
+
+ 
+  <!-- the normal classpath -->
+  <path id="classpath">
+    <pathelement location="${build.classes}"/>
+    <fileset dir="${build.lib.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <!-- the unit test classpath -->
+  <dirname property="plugins.classpath.dir" file="${build.plugins}"/>
+  <path id="test.classpath">
+    <pathelement location="${test.build.classes}" />
+    <pathelement location="${conf.dir}"/>
+    <pathelement location="${test.src.dir}"/>
+    <pathelement location="${plugins.classpath.dir}"/>
+    <path refid="classpath"/>
+    <pathelement location="${build.dir}/${final.name}.job" />
+    <fileset dir="${build.lib.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init" depends="ivy-init" description="--> stuff required by all targets">
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${release.dir}"/>
+
+    <mkdir dir="${test.build.dir}"/>
+    <mkdir dir="${test.build.classes}"/>
+
+    <touch datetime="01/25/1971 2:00 pm">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+    </touch>
+
+    <copy todir="${conf.dir}" verbose="true">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Compile the Java files                                 -->
+  <!-- ====================================================== -->
+  <target name="compile" depends="compile-core, compile-plugins" description="--> compile all Java files"/>
+
+  <target name="compile-core" depends="init, resolve-default" description="--> compile core Java files only">
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${src.dir}"
+     includes="org/apache/nutch/**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg value="-Xlint"/>
+      <classpath refid="classpath"/>
+    </javac>    
+  </target>
+
+  <target name="compile-plugins" depends="init, resolve-default" description="--> compile plugins only">
+    <ant dir="src/plugin" target="deploy" inheritAll="false"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make nutch.jar                                                     -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar" depends="compile-core" description="--> make nutch.jar">
+    <copy file="${conf.dir}/nutch-default.xml"
+          todir="${build.classes}"/>
+    <copy file="${conf.dir}/nutch-site.xml"
+          todir="${build.classes}"/>
+    <jar jarfile="${build.dir}/${final.name}.jar"
+         basedir="${build.classes}">
+      <manifest>
+      </manifest>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make Maven Central Release                                         -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->  
+  <target name="release" depends="compile-core" description="--> generate the release distribution">
+    <copy file="${conf.dir}/nutch-default.xml"
+          todir="${build.classes}"/>
+    <copy file="${conf.dir}/nutch-site.xml"
+          todir="${build.classes}"/>
+          
+	<!-- build the main artifact -->
+	<jar jarfile="${maven-jar}" basedir="${build.classes}" />
+
+	<!-- build the javadoc artifact -->
+    <javadoc
+      destdir="${release.dir}/javadoc"
+      overview="${src.dir}/overview.html"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${name} ${version} API"
+      doctitle="${name} ${version} API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      >
+        <arg value="${javadoc.proxy.host}"/>
+        <arg value="${javadoc.proxy.port}"/>
+
+      <packageset dir="${src.dir}"/>
+      <packageset dir="${plugins.dir}/creativecommons/src/java"/>
+      <packageset dir="${plugins.dir}/feed/src/java"/>
+      <packageset dir="${plugins.dir}/headings/src/java"/>
+      <packageset dir="${plugins.dir}/index-anchor/src/java"/>
+      <packageset dir="${plugins.dir}/index-basic/src/java"/>
+      <packageset dir="${plugins.dir}/index-metadata/src/java"/>
+      <packageset dir="${plugins.dir}/index-more/src/java"/>
+      <packageset dir="${plugins.dir}/index-static/src/java"/>
+      <packageset dir="${plugins.dir}/language-identifier/src/java"/>
+      <packageset dir="${plugins.dir}/lib-http/src/java"/>
+      <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
+      <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
+      <packageset dir="${plugins.dir}/parse-ext/src/java"/>
+      <packageset dir="${plugins.dir}/parse-html/src/java"/>
+      <packageset dir="${plugins.dir}/parse-js/src/java"/>
+      <packageset dir="${plugins.dir}/parse-metatags/src/java"/>
+      <packageset dir="${plugins.dir}/parse-swf/src/java"/>
+      <packageset dir="${plugins.dir}/parse-tika/src/java"/>
+      <packageset dir="${plugins.dir}/parse-zip/src/java"/>
+      <packageset dir="${plugins.dir}/lib-http/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-file/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-http/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-link/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
+      <packageset dir="${plugins.dir}/subcollection/src/java"/>
+      <packageset dir="${plugins.dir}/tld/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
+      <packageset dir="${plugins.dir}/urlmeta/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
+      
+      <link href="${javadoc.link.java}"/>
+      <link href="${javadoc.link.lucene}"/>
+      <link href="${javadoc.link.hadoop}"/>
+      
+      <classpath refid="classpath"/>
+    	<classpath>
+    		<fileset dir="${plugins.dir}" >
+    			<include name="**/*.jar"/>
+    		</fileset>
+    	</classpath>
+    	
+      <group title="Core" packages="org.apache.nutch.*"/>
+      <group title="Plugins API" packages="${plugins.api}"/>
+      <group title="Protocol Plugins" packages="${plugins.protocol}"/>
+      <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+      <group title="Scoring Plugins" packages="${plugins.scoring}"/>
+      <group title="Parse Plugins" packages="${plugins.parse}"/>
+      <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
+      <group title="Misc. Plugins" packages="${plugins.misc}"/>
+    </javadoc>	
+	<jar jarfile="${maven-javadoc-jar}">
+		<fileset dir="${release.dir}/javadoc" />
+	</jar>
+
+	<!-- build the sources artifact -->
+	<jar jarfile="${maven-sources-jar}">
+		<fileset dir="${src.dir}" />
+	</jar>
+  </target>
+  
+  <!-- ================================================================== -->
+  <!-- Deploy to Apache Nexus                                             -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->  
+  <target name="deploy" depends="release" description="--> deploy to Apache Nexus">
+	
+	<!-- generate a pom file -->
+	<ivy:makepom ivyfile="${ivy.file}" pomfile="${basedir}/pom.xml" templatefile="ivy/mvn.template">
+	   <mapping conf="default" scope="compile"/>
+	   <mapping conf="runtime" scope="runtime"/>
+	</ivy:makepom>
+
+	<!-- sign and deploy the main artifact -->
+	<artifact:mvn>
+		<arg value="org.apache.maven.plugins:maven-gpg-plugin:1.1:sign-and-deploy-file" />
+		<arg value="-Durl=${maven-repository-url}" />
+		<arg value="-DrepositoryId=${maven-repository-id}" />
+		<arg value="-DpomFile=pom.xml" />
+		<arg value="-Dfile=${maven-jar}" />
+                       <arg value="-Papache-release" />
+	</artifact:mvn>
+
+	<!-- sign and deploy the sources artifact -->
+	<artifact:mvn>
+		<arg value="org.apache.maven.plugins:maven-gpg-plugin:1.1:sign-and-deploy-file" />
+		<arg value="-Durl=${maven-repository-url}" />
+		<arg value="-DrepositoryId=${maven-repository-id}" />
+		<arg value="-DpomFile=pom.xml" />
+		<arg value="-Dfile=${maven-sources-jar}" />
+		<arg value="-Dclassifier=sources" />
+                       <arg value="-Papache-release" />
+	</artifact:mvn>
+
+	<!-- sign and deploy the javadoc artifact -->
+	<artifact:mvn>
+		<arg value="org.apache.maven.plugins:maven-gpg-plugin:1.1:sign-and-deploy-file" />
+		<arg value="-Durl=${maven-repository-url}" />
+		<arg value="-DrepositoryId=${maven-repository-id}" />
+		<arg value="-DpomFile=pom.xml" />
+		<arg value="-Dfile=${maven-javadoc-jar}" />
+		<arg value="-Dclassifier=javadoc" />
+                       <arg value="-Papache-release" />
+	</artifact:mvn>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make job jar                                                       -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="job" depends="compile" description="--> make nutch.job jar">
+    <jar jarfile="${build.dir}/${final.name}.job">
+      <!-- If the build.classes has the nutch config files because the jar
+           command command has run, exclude them.  The conf directory has 
+           them.
+      -->
+      <zipfileset dir="${build.classes}"
+                  excludes="nutch-default.xml,nutch-site.xml"/>
+      <zipfileset dir="${conf.dir}" excludes="*.template,hadoop*.*"/>
+      <zipfileset dir="${build.lib.dir}" prefix="lib"
+                  includes="**/*.jar" excludes="hadoop-*.jar"/>
+      <zipfileset dir="${build.plugins}" prefix="classes/plugins"/>
+    </jar>
+  </target>
+
+  <target name="runtime" depends="jar, job" description="--> default target for running Nutch">
+    <mkdir dir="${runtime.dir}"/>
+    <mkdir dir="${runtime.local}"/>
+    <mkdir dir="${runtime.deploy}"/>
+    <!-- deploy area -->
+    <copy file="${build.dir}/${final.name}.job"
+          todir="${runtime.deploy}"/>
+    <copy todir="${runtime.deploy}/bin">
+      <fileset dir="src/bin"/>
+    </copy>
+    <chmod perm="ugo+x" type="file">
+        <fileset dir="${runtime.deploy}/bin"/>
+    </chmod>
+    <!-- local area -->
+    <copy file="${build.dir}/${final.name}.jar"
+          todir="${runtime.local}/lib"/>
+    <copy todir="${runtime.local}/lib/native">
+      <fileset dir="lib/native"/>
+    </copy>
+    <copy todir="${runtime.local}/conf">
+      <fileset dir="conf" excludes="*.template"/>
+    </copy>
+    <copy todir="${runtime.local}/bin">
+      <fileset dir="src/bin"/>
+    </copy>
+    <chmod perm="ugo+x" type="file">
+        <fileset dir="${runtime.local}/bin"/>
+    </chmod>
+    <copy todir="${runtime.local}/lib">
+      <fileset dir="${build.dir}/lib"/>
+    </copy>
+    <copy todir="${runtime.local}/plugins">
+      <fileset dir="${build.dir}/plugins"/>
+    </copy>
+    <copy todir="${runtime.local}/test">
+      <fileset dir="${build.dir}/test"/>
+    </copy>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  --> 
+  <!-- ================================================================== -->
+  <target name="compile-core-test" depends="compile-core, resolve-test" description="--> compile test code">
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${test.src.dir}"
+     includes="org/apache/nutch/**/*.java"
+     destdir="${test.build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg value="-Xlint"/>
+      <classpath refid="test.classpath"/>
+    </javac>    
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run Nutch proxy                                                    --> 
+  <!-- ================================================================== -->
+
+  <target name="proxy" depends="job, compile-core-test" description="--> run nutch proxy">
+    <java classname="org.apache.nutch.tools.proxy.TestbedProxy" fork="true">
+      <classpath refid="test.classpath"/>
+      <arg value="-fake"/>
+<!--
+      <arg value="-delay"/>
+      <arg value="-200"/>
+-->
+      <jvmarg line="-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+    </java>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run Nutch benchmarking analysis                                    --> 
+  <!-- ================================================================== -->
+
+  <target name="benchmark" description="--> run nutch benchmarking analysis">
+    <java classname="org.apache.nutch.tools.Benchmark" fork="true">
+      <classpath refid="test.classpath"/>
+      <jvmarg line="-Xmx512m -Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+      <arg value="-maxPerHost"/>
+      <arg value="10"/>
+      <arg value="-seeds"/>
+      <arg value="1"/>
+      <arg value="-depth"/>
+      <arg value="5"/>
+    </java>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     --> 
+  <!-- ================================================================== -->
+  <target name="test" depends="test-core, test-plugins" description="--> run JUnit tests"/>
+
+  <target name="test-core" depends="job, compile-core-test" description="--> run core JUnit tests only">
+
+    <delete dir="${test.build.data}"/>
+    <mkdir dir="${test.build.data}"/>
+    <!-- 
+     copy resources needed in junit tests
+    -->
+    <copy todir="${test.build.data}">
+      <fileset dir="src/testresources" includes="**/*"/>
+    </copy>
+    
+    <copy file="${test.src.dir}/nutch-site.xml"
+          todir="${test.build.classes}"/>
+
+    <copy file="${test.src.dir}/log4j.properties"
+          todir="${test.build.classes}"/>
+
+    <copy file="${test.src.dir}/crawl-tests.xml"
+        todir="${test.build.classes}"/>
+        
+    <copy file="${test.src.dir}/domain-urlfilter.txt"
+        todir="${test.build.classes}"/>
+
+    <copy file="${test.src.dir}/filter-all.txt"
+        todir="${test.build.classes}"/>
+
+    <junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"
+      errorProperty="tests.failed" failureProperty="tests.failed" maxmemory="1000m">
+      <sysproperty key="test.build.data" value="${test.build.data}"/>
+      <sysproperty key="test.src.dir" value="${test.src.dir}"/>
+      <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/> 
+      <classpath refid="test.classpath"/>
+      <formatter type="${test.junit.output.format}" />
+      <batchtest todir="${test.build.dir}" unless="testcase">
+        <fileset dir="${test.src.dir}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${test.build.dir}" if="testcase">
+        <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+
+    <fail if="tests.failed">Tests failed!</fail>
+
+  </target>   
+
+  <target name="test-plugins" depends="compile" description="--> run plugin JUnit tests only">
+    <ant dir="src/plugin" target="test" inheritAll="false"/>
+  </target>
+
+  <target name="nightly" depends="test, tar-src, zip-src" description="--> run the nightly target build">
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Ivy targets                                                        -->
+  <!-- ================================================================== -->
+
+  <!-- target: resolve  ================================================= -->
+  <target name="resolve-default" depends="clean-lib" description="--> resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="${ivy.file}" conf="default" log="download-only"/>
+    <ivy:retrieve pattern="${build.lib.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+    <antcall target="copy-libs"/>
+  </target>
+
+  <target name="resolve-test" depends="clean-lib" description="--> resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="${ivy.file}" conf="test" log="download-only"/>
+    <ivy:retrieve pattern="${build.lib.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+    <antcall target="copy-libs"/>
+  </target>
+
+  <target name="copy-libs" description="--> copy the libs in lib, which are not ivy enabled">
+    <!-- copy the libs in lib, which are not ivy enabled-->
+    <copy todir="${build.lib.dir}/" failonerror="false">
+      <fileset dir="${lib.dir}" includes="**/*.jar"/>
+    </copy>
+  </target>
+
+  <!-- target: publish-local  =========================================== -->
+  <target name="publish-local" depends="jar" description="--> publish this project in the local ivy repository">
+    <ivy:publish artifactspattern="${build.dir}/[artifact]-${version}.[ext]" 
+                    resolver="local"
+                    pubrevision="${version}"
+          pubdate="${now}"
+                    status="integration"
+              forcedeliver="true"
+              overwrite="true"
+        />
+    <echo message="project ${ant.project.name} published locally with version ${version}" />
+  </target>
+
+  <!-- target: report  ================================================== -->
+  <target name="report" depends="resolve-test" description="--> generates a report of dependencies">
+    <ivy:report todir="${build.dir}"/>
+  </target>
+
+  <!-- target: ivy-init  ================================================ -->
+  <target name="ivy-init" depends="ivy-probe-antlib, ivy-init-antlib" description="--> initialise Ivy settings">
+    <ivy:settings file="${ivy.dir}/ivysettings.xml" />
+  </target>
+
+  <!-- target: ivy-probe-antlib  ======================================== -->
+  <target name="ivy-probe-antlib" description="--> probe the antlib library">
+    <condition property="ivy.found">
+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" />
+    </condition>
+  </target>
+
+  <!-- target: ivy-download  ============================================ -->
+  <target name="ivy-download" description="--> download ivy">
+    <available file="${ivy.jar}" property="ivy.jar.found"/>
+    <antcall target="ivy-download-unchecked"/>
+  </target>
+
+  <!-- target: ivy-download-unchecked  ================================== -->
+  <target name="ivy-download-unchecked" unless="ivy.jar.found" description="--> fetch any ivy file">
+    <get src="${ivy.repo.url}" dest="${ivy.jar}" usetimestamp="true" />
+  </target>
+
+  <!-- target: ivy-init-antlib  ========================================= -->
+  <target name="ivy-init-antlib" depends="ivy-download" unless="ivy.found" description="--> attempt to use Ivy with Antlib">
+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail" loaderRef="ivyLoader">
+      <classpath>
+        <pathelement location="${ivy.jar}" />
+      </classpath>
+    </typedef>
+    <fail>
+      <condition>
+        <not>
+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" />
+        </not>
+      </condition>
+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
+      It could not be loaded from ${ivy.repo.url}
+    </fail>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Documentation                                                      -->
+  <!-- ================================================================== -->
+  <target name="javadoc" depends="compile" description="--> generate Javadoc">
+    <mkdir dir="${build.javadoc}"/>
+    <javadoc
+      overview="${src.dir}/overview.html"
+      destdir="${build.javadoc}"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${name} ${version} API"
+      doctitle="${name} ${version} API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      >
+        <arg value="${javadoc.proxy.host}"/>
+        <arg value="${javadoc.proxy.port}"/>
+
+      <packageset dir="${src.dir}"/>
+      <packageset dir="${plugins.dir}/creativecommons/src/java"/>
+      <packageset dir="${plugins.dir}/feed/src/java"/>
+      <packageset dir="${plugins.dir}/headings/src/java"/>
+      <packageset dir="${plugins.dir}/index-anchor/src/java"/>
+      <packageset dir="${plugins.dir}/index-basic/src/java"/>
+      <packageset dir="${plugins.dir}/index-metadata/src/java"/>
+      <packageset dir="${plugins.dir}/index-more/src/java"/>
+      <packageset dir="${plugins.dir}/index-static/src/java"/>
+      <packageset dir="${plugins.dir}/language-identifier/src/java"/>
+      <packageset dir="${plugins.dir}/lib-http/src/java"/>
+      <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
+      <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
+      <packageset dir="${plugins.dir}/parse-ext/src/java"/>
+      <packageset dir="${plugins.dir}/parse-html/src/java"/>
+      <packageset dir="${plugins.dir}/parse-js/src/java"/>
+      <packageset dir="${plugins.dir}/parse-metatags/src/java"/>
+      <packageset dir="${plugins.dir}/parse-swf/src/java"/>
+      <packageset dir="${plugins.dir}/parse-tika/src/java"/>
+      <packageset dir="${plugins.dir}/parse-zip/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-file/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-http/src/java"/>
+      <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-link/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
+      <packageset dir="${plugins.dir}/subcollection/src/java"/>
+      <packageset dir="${plugins.dir}/tld/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
+      <packageset dir="${plugins.dir}/urlmeta/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
+      <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
+      
+      <link href="${javadoc.link.java}"/>
+      <link href="${javadoc.link.lucene}"/>
+      <link href="${javadoc.link.hadoop}"/>
+      
+      <classpath refid="classpath"/>
+    	<classpath>
+    		<fileset dir="${plugins.dir}" >
+    			<include name="**/*.jar"/>
+    		</fileset>
+    	</classpath>
+    	
+      <group title="Core" packages="org.apache.nutch.*"/>
+      <group title="Plugins API" packages="${plugins.api}"/>
+      <group title="Protocol Plugins" packages="${plugins.protocol}"/>
+      <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
+      <group title="Scoring Plugins" packages="${plugins.scoring}"/>
+      <group title="Parse Plugins" packages="${plugins.parse}"/>
+      <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
+      <group title="Misc. Plugins" packages="${plugins.misc}"/>
+    </javadoc>
+    <!-- Copy the plugin.dtd file to the plugin doc-files dir -->
+    <copy file="${plugins.dir}/plugin.dtd"
+          todir="${build.javadoc}/org/apache/nutch/plugin/doc-files"/>
+  </target>	
+	
+  <target name="default-doc" description="--> generate default Nutch documentation">
+    <style basedir="${conf.dir}" destdir="${docs.dir}"
+           includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
+  </target>
+
+    <!-- ================================================================== -->
+  <!-- D I S T R I B U T I O N                                            -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="package-src" depends="runtime, javadoc" description="--> generate source distribution package">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${src.dist.version.dir}"/>
+    <mkdir dir="${src.dist.version.dir}/lib"/>
+    <mkdir dir="${src.dist.version.dir}/docs"/>
+    <mkdir dir="${src.dist.version.dir}/docs/api"/>
+    <mkdir dir="${src.dist.version.dir}/ivy"/>
+
+    <copy todir="${src.dist.version.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="lib"/>
+    </copy>
+   
+    <copy todir="${src.dist.version.dir}/conf">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/docs/api">
+      <fileset dir="${build.javadoc}"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+        <!--<include name="KEYS" />-->
+      </fileset>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/src" includeEmptyDirs="true">
+      <fileset dir="src"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/ivy" includeEmptyDirs="true">
+      <fileset dir="ivy"/>
+    </copy>
+
+    <copy todir="${src.dist.version.dir}/" file="build.xml"/>
+    <copy todir="${src.dist.version.dir}/" file="default.properties"/>
+
+  </target>
+
+ <target name="package-bin" depends="runtime, javadoc" description="--> generate binary distribution package">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${bin.dist.version.dir}"/>
+    <mkdir dir="${bin.dist.version.dir}/lib"/>
+    <mkdir dir="${bin.dist.version.dir}/bin"/>
+    <mkdir dir="${bin.dist.version.dir}/conf"/>
+    <mkdir dir="${bin.dist.version.dir}/docs"/>
+    <mkdir dir="${bin.dist.version.dir}/docs/api"/>
+    <mkdir dir="${bin.dist.version.dir}/plugins"/>
+
+    <copy todir="${bin.dist.version.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="runtime/local/lib"/>
+    </copy>
+   
+    <copy todir="${bin.dist.version.dir}/bin">
+      <fileset dir="runtime/local/bin"/>
+    </copy>
+
+    <chmod perm="ugo+x" type="file">
+        <fileset dir="${bin.dist.version.dir}/bin"/>
+    </chmod>
+
+    <copy todir="${bin.dist.version.dir}/conf">
+      <fileset dir="runtime/local/conf" excludes="**/*.template"/>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}/docs/api">
+      <fileset dir="${build.javadoc}"/>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+      </fileset>
+    </copy>
+
+    <copy todir="${bin.dist.version.dir}/plugins" includeEmptyDirs="true">
+      <fileset dir="runtime/local/plugins"/>
+    </copy>
+
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make src release tarball                                               -->
+  <!-- ================================================================== -->
+  <target name="tar-src" depends="package-src" description="--> generate src.tar.gz distribution package">
+    <tar compression="gzip" longfile="gnu"
+      destfile="${src.dist.version.dir}.tar.gz">
+      <tarfileset dir="${src.dist.version.dir}" mode="664" prefix="${final.name}">
+        <exclude name="src/bin/*" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${src.dist.version.dir}" mode="755" prefix="${final.name}">
+        <include name="src/bin/*" />
+      </tarfileset>
+    </tar>
+  </target>
+  
+  <!-- ================================================================== -->
+  <!-- Make bin release tarball                                               -->
+  <!-- ================================================================== -->
+  <target name="tar-bin" depends="package-bin" description="--> generate bin.tar.gz distribution package">
+    <tar compression="gzip" longfile="gnu"
+      destfile="${bin.dist.version.dir}.tar.gz">
+      <tarfileset dir="${bin.dist.version.dir}" mode="664" prefix="${final.name}">
+        <exclude name="bin/*" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${bin.dist.version.dir}" mode="755" prefix="${final.name}">
+        <include name="bin/*" />
+      </tarfileset>
+    </tar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make src release zip                                               -->
+  <!-- ================================================================== -->
+  <target name="zip-src" depends="package-src" description="--> generate src.zip distribution package">
+   <zip compress="true" casesensitive="yes" 
+     destfile="${src.dist.version.dir}.zip">
+   <zipfileset dir="${src.dist.version.dir}" filemode="664" prefix="${final.name}">
+       <exclude name="src/bin/*" />
+       <include name="**" />
+   </zipfileset>
+   <zipfileset dir="${src.dist.version.dir}" filemode="755" prefix="${final.name}">
+       <include name="src/bin/*" />
+   </zipfileset>
+   </zip>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make bin release zip                                               -->
+  <!-- ================================================================== -->
+  <target name="zip-bin" depends="package-bin" description="--> generate bin.zip distribution package">
+   <zip compress="true" casesensitive="yes" 
+     destfile="${bin.dist.version.dir}.zip">
+   <zipfileset dir="${bin.dist.version.dir}" filemode="664" prefix="${final.name}">
+       <exclude name="bin/*" />
+       <include name="**" />
+   </zipfileset>
+   <zipfileset dir="${bin.dist.version.dir}" filemode="755" prefix="${final.name}">
+       <include name="bin/*" />
+   </zipfileset>
+   </zip>
+  </target>
+	
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+
+  <!-- target: clean  =================================================== -->
+  <target name="clean" depends="clean-build, clean-lib, clean-dist, clean-runtime" description="--> clean the project" />
+
+  <!-- target: clean-local  ============================================= -->
+  <target name="clean-local" depends="" 
+                description="--> cleans the local repository for the current module">
+    <delete dir="${ivy.local.default.root}/${ivy.organisation}/${ivy.module}"/>
+  </target>
+
+  <!-- target: clean-lib  =============================================== -->
+  <target name="clean-lib" description="--> clean the project libraries directory (dependencies)">
+    <delete includeemptydirs="true" dir="${build.lib.dir}"/>
+  </target>
+
+  <!-- target: clean-build  ============================================= -->
+  <target name="clean-build" description="--> clean the project built files">
+    <delete includeemptydirs="true" dir="${build.dir}"/>
+  </target>
+
+  <!-- target: clean-dist   ============================================= -->
+  <target name="clean-dist" description="--> clean the project dist files">
+    <delete includeemptydirs="true" dir="${dist.dir}"/>
+  </target>
+
+  <!-- target: clean-cache  ============================================= -->
+  <target name="clean-cache" depends=""
+                        description="--> delete ivy cache">
+    <ivy:cleancache />
+  </target>
+
+  <target name="clean-runtime" description="--> clean the project runtime area">
+    <delete includeemptydirs="true" dir="${runtime.dir}"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- RAT targets                                                        -->
+  <!-- ================================================================== -->
+  <target name="rat-sources-typedef" description="--> run RAT antlib task">
+    <typedef resource="org/apache/rat/anttasks/antlib.xml" >
+      <classpath>
+        <fileset dir="." includes="rat*.jar"/>
+      </classpath>
+    </typedef>
+  </target>
+
+  <target name="rat-sources" depends="rat-sources-typedef"
+	  description="--> runs RAT tasks over src/java">
+    <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
+      <fileset dir="src">
+      	<include name="java/**/*"/>
+      	<include name="plugin/**/src/**/*"/>
+      </fileset>
+    </rat:report>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- SONAR targets                                                      -->
+  <!-- ================================================================== -->
+  
+  <!-- Define the Sonar task if this hasn't been done in a common script -->
+  <taskdef uri="antlib:org.sonar.ant" resource="org/sonar/ant/antlib.xml">
+    <classpath path="${ant.library.dir}"/>
+    <classpath path="${mysql.library.dir}"/>
+  </taskdef>
+  
+  <!-- Add the target -->
+  <target name="sonar" description="--> run SONAR analysis">
+    
+    <!-- list of mandatory source directories (required) -->
+    <property name="sonar.sources" value="${src.dir}"/>
+ 
+    <!-- list of properties (optional) -->
+    <property name="sonar.projectName" value="Nutch Trunk 1.4 Sonar Analysis" />
+    <property name="sonar.binaries" value="${build.dir}/classes" />
+    <property name="sonar.binaries" value="${build.dir}/plugins" />
+    <property name="sonar.tests" value="${test.src.dir}" />
+
+    <sonar:sonar workDir="${base.dir}" key="org.apache.nutch:trunk" 
+     version="1.4-SNAPSHOT" xmlns:sonar="antlib:org.sonar.ant"/>
+  </target>
+	
+</project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/adaptive-mimetypes.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This configuration file is used by the MimeAdaptiveFetchScheduler and
+# allows the user to set the INC and DEC rates for the AdaptiveFetchScheduler
+# by MIME-type. Values are separated by tab.
+
+# MIME-type	inc_rate	dec_rate
+text/html	0.2	0.2
+application/xhtml+xml	0.2	0.2
+application/pdf	0.1	0.4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/automaton-urlfilter.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The default url filter.
+# Better for whole-internet crawling.
+
+# Each non-comment, non-blank line contains a regular expression
+# prefixed by '+' or '-'.  The first matching pattern in the file
+# determines whether a URL is included or ignored.  If no pattern
+# matches, the URL is ignored.
+
+# skip file: ftp: and mailto: urls
+-(file|ftp|mailto):.*
+
+# skip image and other suffixes we can't yet parse
+# for a more extensive coverage use the urlfilter-suffix plugin
+-.*\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)
+
+# skip URLs containing certain characters as probable queries, etc.
+-.*[?*!@=].*
+
+# accept anything else
++.*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/automaton-urlfilter.txt.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The default url filter.
+# Better for whole-internet crawling.
+
+# Each non-comment, non-blank line contains a regular expression
+# prefixed by '+' or '-'.  The first matching pattern in the file
+# determines whether a URL is included or ignored.  If no pattern
+# matches, the URL is ignored.
+
+# skip file: ftp: and mailto: urls
+-(file|ftp|mailto):.*
+
+# skip image and other suffixes we can't yet parse
+# for a more extensive coverage use the urlfilter-suffix plugin
+-.*\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)
+
+# skip URLs containing certain characters as probable queries, etc.
+-.*[?*!@=].*
+
+# accept anything else
++.*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/configuration.xsl	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,40 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/domain-suffixes.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,4428 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+  Document   : domain-suffixes.xml
+  Author     : Enis Soztutar - enis.soz.nutch@gmail.com
+  Description: This document contains top level domains 
+  as described by the Internet Assigned Numbers
+  Authotiry (IANA), and second or third level domains that 
+  are known to be managed by domain registerers. People at 
+  Mozilla community call these public suffixes or effective 
+  tlds. There is no algorithmic way of knowing whether a suffix 
+  is a public domain suffix, or not. So this large file is used 
+  for this purpose. The entries in the file is used to find the
+  domain of a url, which may not the same thing as the host of 
+  the url. For example for "http://lucene.apache.org/nutch" the 
+  hostname is lucene.apache.org, however the domain name for this
+  url would be apache.org. Domain names can be quite handy for 
+  statistical analysis, and fighting against spam.    
+  
+  The list of TLDs is constructed from IANA, and the 
+  list of "effective tlds" are constructed from Wikipedia, 
+  http://wiki.mozilla.org/TLD_List, and http://publicsuffix.org/
+  The list may not include all the suffixes, but some
+  effort has been spent to make it comprehensive. Please forward 
+  any improvements for this list to nutch-dev mailing list, or 
+  nutch JIRA. 
+  
+  Top level domains(tlds) are grouped
+  to three, namely infrastrusture, generic and country 
+  code tlds. Infrastrusture tlds are only used for 
+  technical reasons. Generic tlds represents the type 
+  of the organization that they represent. Those in 
+  current use and those waiting for approval is listed.
+  Most of the country code tlds correspond to the two 
+  letter ISO-3166-1 country codes. 
+  Each tld is listed with its domain (such as com), a 
+  status enumeration describing the status of the tld, 
+  and optionally a description or description for convenience.
+  cctlds are listed with additional country name field.
+  
+  status and boost elements are optional, with default values IN_USE 
+  and 1.0 respectively. see domain-suffixes.xsd for the xml schema.
+  
+  
+  References : 
+  http://www.iana.org
+  http://www.iana.org/gtld/gtld.htm
+  http://www.iana.org/root-whois/index.html
+  http://en.wikipedia.org/wiki/Top-level_domain
+  http://en.wikipedia.org/wiki/List_of_Internet_top-level_domains
+  http://wiki.mozilla.org/TLD_List
+  http://publicsuffix.org/
+  https://bugzilla.mozilla.org/show_bug.cgi?id=331510
+  http://www.neuhaus.com/domaincheck/domain_list.htm
+-->
+
+<domains xmlns="http://lucene.apache.org/nutch"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://lucene.apache.org/nutch domain-suffixes.xsd">
+
+  <tlds>
+    <!--  Infrastructure Top Level Domains -->
+    <itlds>
+      <tld domain="root">
+        <status>INFRASTRUCTURE</status>
+        <description>
+          (from http://en.wikipedia.org/wiki/.root)
+          vrsn-end-of-zone-marker-dummy-record.root is a domain name
+          listed in the DNS root zone as a diagnostic marker, whose
+          presence demonstrates the root zone was not truncated upon
+          loading by a root nameserver. It could be argued it represents
+          a top-level domain of .root, although technically no such
+          delegation exists.
+        </description>
+      </tld>
+
+      <tld domain="arpa">
+        <status>INFRASTRUCTURE</status>
+        <description>
+          (from http://en.wikipedia.org/wiki/.arpa) .arpa is an Internet
+          top-level domain (TLD) used exclusively for
+          Internet-infrastructure purposes. It does not function as a
+          normal TLD where websites are registered, but rather as a
+          meta-TLD used to look up addresses, and for other purposes.
+        </description>
+      </tld>
+    </itlds><!--  Generic Top Level Domains -->
+    <gtlds>
+      <!-- 
+        The following gTLDs are in actual use
+      -->
+
+      <tld domain="aero">
+        <status>SPONSORED</status>
+        <description>for the air transport industry</description>
+      </tld>
+
+      <tld domain="biz">
+        <status>UNSPONSORED</status>
+        <description>for business use</description>
+      </tld>
+
+      <tld domain="cat">
+        <status>SPONSORED</status>
+        <description>for Catalan language/culture</description>
+      </tld>
+
+      <tld domain="com">
+        <status>UNSPONSORED</status>
+        <description>
+          for commercial organizations, but unrestricted
+        </description>
+      </tld>
+
+      <tld domain="coop">
+        <status>SPONSORED</status>
+        <description>for cooperatives</description>
+      </tld>
+
+      <tld domain="edu">
+        <status>UNSPONSORED</status>
+        <boost>1.0</boost>
+        <description>
+          for post-secondary educational establishments
+        </description>
+      </tld>
+
+      <tld domain="gov">
+        <status>UNSPONSORED</status>
+        <description>
+          for governments and their agencies in the United States
+        </description>
+      </tld>
+
+      <tld domain="info">
+        <status>UNSPONSORED</status>
+        <description>
+          for informational sites, but unrestricted
+        </description>
+      </tld>
+
+      <tld domain="int">
+        <status>UNSPONSORED</status>
+        <description>
+          for international organizations established by treaty
+        </description>
+      </tld>
+
+      <tld domain="jobs">
+        <status>SPONSORED</status>
+        <description>for employment-related sites</description>
+      </tld>
+
+      <tld domain="mil">
+        <status>UNSPONSORED</status>
+        <description>for the US military</description>
+      </tld>
+
+      <tld domain="mobi">
+        <status>SPONSORED</status>
+        <description>for sites catering to mobile devices</description>
+      </tld>
+
+      <tld domain="museum">
+        <status>SPONSORED</status>
+        <description>for museums</description>
+      </tld>
+
+      <tld domain="name">
+        <status>UNSPONSORED</status>
+        <description>for families and individuals</description>
+      </tld>
+
+      <tld domain="net">
+        <status>UNSPONSORED</status>
+        <description>
+          originally for network infrastructures, now unrestricted
+        </description>
+      </tld>
+
+      <tld domain="org">
+        <status>UNSPONSORED</status>
+        <description>
+          originally for organizations not clearly falling within the
+          other gTLDs, now unrestricted
+        </description>
+      </tld>
+
+      <tld domain="pro">
+        <status>SPONSORED</status>
+        <description>for certain professions</description>
+      </tld>
+
+      <tld domain="travel">
+        <status>SPONSORED</status>
+        <description>
+          for travel agents, airlines, hoteliers, tourism bureaus, etc.
+        </description>
+      </tld>
+
+      <!-- 
+        The following gTLDs are in the process of being approved, 
+        and may be added to the root nameservers in the near future 
+      -->
+
+      <tld domain="asia">
+        <status>STARTUP</status>
+        <description>for the Asian community</description>
+      </tld>
+
+      <tld domain="post">
+        <status>PROPOSED</status>
+        <description>for postal services</description>
+      </tld>
+
+      <tld domain="tel">
+        <status>STARTUP</status>
+        <description>
+          for services involving connections between the telephone
+          network and the Internet
+        </description>
+      </tld>
+
+      <tld domain="geo">
+        <status>PROPOSED</status>
+        <description>for geographically related sites</description>
+      </tld>
+
+      <tld domain="gal">
+        <status>PROPOSED</status>
+        <description>for Galicia, a country within Spain</description>
+      </tld>
+
+      <tld domain="cym">
+        <status>PROPOSED</status>
+        <description>for Wales, a country within the UK</description>
+      </tld>
+
+      <tld domain="sco">
+        <status>PROPOSED</status>
+        <description>for Scotland, a country within the UK</description>
+      </tld>
+
+      <tld domain="kid">
+        <status>PROPOSED</status>
+        <description>for websites designed for children</description>
+      </tld>
+
+      <tld domain="kids">
+        <status>PROPOSED</status>
+        <description>for websites designed for children</description>
+      </tld>
+
+      <tld domain="mail">
+        <status>PROPOSED</status>
+        <description>http://en.wikipedia.org/wiki/.mail</description>
+      </tld>
+
+      <tld domain="web">
+        <status>PROPOSED</status>
+        <description>For Web sites of all sorts</description>
+      </tld>
+
+      <tld domain="xxx">
+        <status>PROPOSED</status>
+        <description>For Adult entertainment sites</description>
+      </tld>
+
+      <!-- 
+        The following gTLDs are removed from the registry
+      -->
+      <tld domain="nato">
+        <status>DELETED</status>
+        <description>
+          for NATO sites and operations. Replaced by .int
+        </description>
+      </tld>
+
+      <!-- 
+        The following gTLDs are PSEUDO_DOMAINs
+      -->
+      <tld domain="bitnet">
+        <status>PSEUDO_DOMAIN</status>
+        <description>
+          identifying a hostname not connected directly to the Internet,
+          but a bitnet network
+        </description>
+      </tld>
+
+      <tld domain="csnet">
+        <status>PSEUDO_DOMAIN</status>
+        <description>
+          identifying a hostname not connected directly to the Internet,
+          but a csnet network
+        </description>
+      </tld>
+
+      <tld domain="uucp">
+        <status>PSEUDO_DOMAIN</status>
+        <description>
+          identifying a hostname not connected directly to the Internet,
+          but a bitnet network
+        </description>
+      </tld>
+
+      <tld domain="local">
+        <status>PSEUDO_DOMAIN</status>
+        <description>
+          .local is a pseudo top-level domain used by Apple, Inc.'s
+          Bonjour protocol.
+        </description>
+      </tld>
+
+      <tld domain="internal">
+        <status>PSEUDO_DOMAIN</status>
+        <description>alias of .local</description>
+      </tld>
+
+      <tld domain="onion">
+        <status>PSEUDO_DOMAIN</status>
+        <description>
+          designates an anonymous or pseudonymous address reachable via
+          the Tor network.
+        </description>
+      </tld>
+    </gtlds><!--  Country Code Top Level Domains -->
+
+    <cctlds>
+      <tld domain="ac">
+        <country>Ascension Island</country>
+      </tld>
+
+      <tld domain="ad">
+        <country>Andorra</country>
+      </tld>
+
+      <tld domain="ae">
+        <country>United Arab Emirates</country>
+      </tld>
+
+      <tld domain="af">
+        <country>Afghanistan</country>
+      </tld>
+
+      <tld domain="ag">
+        <country>Antigua and Barbuda</country>
+      </tld>
+
+      <tld domain="ai">
+        <country>Anguilla</country>
+      </tld>
+
+      <tld domain="al">
+        <country>Albania</country>
+      </tld>
+
+      <tld domain="am">
+        <country>Armenia</country>
+      </tld>
+
+      <tld domain="an">
+        <country>Netherlands Antilles</country>
+      </tld>
+
+      <tld domain="ao">
+        <country>Angola</country>
+      </tld>
+
+      <tld domain="aq">
+        <country>Antarctica</country>
+      </tld>
+
+      <tld domain="ar">
+        <country>Argentina</country>
+      </tld>
+
+      <tld domain="as">
+        <country>American Samoa</country>
+      </tld>
+
+      <tld domain="at">
+        <country>Austria</country>
+      </tld>
+
+      <tld domain="au">
+        <country>Australia</country>
+      </tld>
+
+      <tld domain="aw">
+        <country>Aruba</country>
+      </tld>
+
+      <tld domain="ax">
+        <country>Aland Islands</country>
+      </tld>
+
+      <tld domain="az">
+        <country>Azerbaijan</country>
+      </tld>
+
+      <tld domain="ba">
+        <country>Bosnia and Herzegovina</country>
+      </tld>
+
+      <tld domain="bb">
+        <country>Barbados</country>
+      </tld>
+
+      <tld domain="bd">
+        <country>Bangladesh</country>
+      </tld>
+
+      <tld domain="be">
+        <country>Belgium</country>
+      </tld>
+
+      <tld domain="bf">
+        <country>Burkina Faso</country>
+      </tld>
+
+      <tld domain="bg">
+        <country>Bulgaria</country>
+      </tld>
+
+      <tld domain="bh">
+        <country>Bahrain</country>
+      </tld>
+
+      <tld domain="bi">
+        <country>Burundi</country>
+      </tld>
+
+      <tld domain="bj">
+        <country>Benin</country>
+      </tld>
+
+      <tld domain="bm">
+        <country>Bermuda</country>
+      </tld>
+
+      <tld domain="bn">
+        <country>Brunei</country>
+      </tld>
+
+      <tld domain="bo">
+        <country>Bolivia</country>
+      </tld>
+
+      <tld domain="br">
+        <country>Brazil</country>
+      </tld>
+
+      <tld domain="bs">
+        <country>Bahamas</country>
+      </tld>
+
+      <tld domain="bt">
+        <country>Bhutan</country>
+      </tld>
+
+      <tld domain="bu">
+        <country>Burma</country>
+        <status>NOT_IN_USE</status>
+        <description>
+          not in use since re-naming of country to Myanmar, see .mm
+        </description>
+      </tld>
+
+      <tld domain="bv">
+        <country>Bouvet Island</country>
+        <status>NOT_IN_USE</status>
+        <description>not in use; no registrations</description>
+      </tld>
+
+      <tld domain="bw">
+        <country>Botswana</country>
+      </tld>
+
+      <tld domain="by">
+        <country>Belarus</country>
+      </tld>
+
+      <tld domain="bz">
+        <country>Belize</country>
+      </tld>
+
+      <tld domain="ca">
+        <country>Canada</country>
+      </tld>
+
+      <tld domain="cc">
+        <country>Cocos Keeling Islands</country>
+      </tld>
+
+      <tld domain="cd">
+        <country>Democratic Republic of the Congo</country>
+        <description>formerly .zr - Zaire</description>
+      </tld>
+
+      <tld domain="cf">
+        <country>Central African Republic</country>
+      </tld>
+
+      <tld domain="cg">
+        <country>Republic of the Congo</country>
+      </tld>
+
+      <tld domain="ch">
+        <country>Switzerland</country>
+      </tld>
+
+      <tld domain="ci">
+        <country>Côte d'Ivoire</country>
+        <description>Ivory Coast</description>
+      </tld>
+
+      <tld domain="ck">
+        <country>Cook Islands</country>
+      </tld>
+
+      <tld domain="cl">
+        <country>Chile</country>
+      </tld>
+
+      <tld domain="cm">
+        <country>Cameroon</country>
+      </tld>
+
+      <tld domain="cn">
+        <country>People s Republic of China</country>
+      </tld>
+
+      <tld domain="co">
+        <country>Colombia</country>
+      </tld>
+
+      <tld domain="cr">
+        <country>Costa Rica</country>
+      </tld>
+
+      <tld domain="cs">
+        <country>Serbia and Montenegro</country>
+        <status>DELETED</status>
+        <description>
+          formerly .yu - Yugoslavia; description: on June 3, 2006,
+          Montenegro declared independence, thus dissolving the state
+          union) (.cs code not assigned; no DNS) (.cs code previously
+          used for Czechoslovakia
+        </description>
+      </tld>
+
+      <tld domain="cu">
+        <country>Cuba</country>
+      </tld>
+
+      <tld domain="cv">
+        <country>Cape Verde</country>
+      </tld>
+
+      <tld domain="cx">
+        <country>Christmas Island</country>
+      </tld>
+
+      <tld domain="cy">
+        <country>Cyprus</country>
+      </tld>
+
+      <tld domain="cz">
+        <country>Czech Republic</country>
+      </tld>
+
+      <tld domain="dd">
+        <country>German Democratic Republic(East Germany)</country>
+        <status>DELETED</status>
+        <description>deleted in 1990</description>
+      </tld>
+
+      <tld domain="de">
+        <country>Germany</country>
+      </tld>
+
+      <tld domain="dj">
+        <country>Djibouti</country>
+      </tld>
+
+      <tld domain="dk">
+        <country>Denmark</country>
+      </tld>
+
+      <tld domain="dm">
+        <country>Dominica</country>
+      </tld>
+
+      <tld domain="do">
+        <country>Dominican Republic</country>
+      </tld>
+
+      <tld domain="dz">
+        <country>Algeria</country>
+      </tld>
+
+      <tld domain="ec">
+        <country>Ecuador</country>
+      </tld>
+
+      <tld domain="ee">
+        <country>Estonia</country>
+      </tld>
+
+      <tld domain="eg">
+        <country>Egypt</country>
+      </tld>
+
+      <tld domain="eh">
+        <country>Western Sahara</country>
+        <status>NOT_IN_USE</status>
+        <description>not assigned; no DNS</description>
+      </tld>
+
+      <tld domain="er">
+        <country>Eritrea</country>
+      </tld>
+
+      <tld domain="es">
+        <country>Spain</country>
+      </tld>
+
+      <tld domain="et">
+        <country>Ethiopia</country>
+      </tld>
+
+      <tld domain="eu">
+        <country>European Union</country>
+        <description>
+          code "exceptionally reserved" by ISO 3166-1
+        </description>
+      </tld>
+
+      <tld domain="fi">
+        <country>Finland</country>
+      </tld>
+
+      <tld domain="fj">
+        <country>Fiji</country>
+      </tld>
+
+      <tld domain="fk">
+        <country>Falkland Islands</country>
+      </tld>
+
+      <tld domain="fm">
+        <country>Federated States of Micronesia</country>
+      </tld>
+
+      <tld domain="fo">
+        <country>Faroe Islands</country>
+      </tld>
+
+      <tld domain="fr">
+        <country>France</country>
+      </tld>
+
+      <tld domain="ga">
+        <country>Gabon</country>
+      </tld>
+
+      <tld domain="gb">
+        <country>United Kingdom</country>
+        <description>
+          Reserved domain by IANA; deprecated – see .uk
+        </description>
+      </tld>
+
+      <tld domain="gd">
+        <country>Grenada</country>
+      </tld>
+
+      <tld domain="ge">
+        <country>Georgia</country>
+      </tld>
+
+      <tld domain="gf">
+        <country>French Guiana</country>
+      </tld>
+
+      <tld domain="gg">
+        <country>Guernsey</country>
+      </tld>
+
+      <tld domain="gh">
+        <country>Ghana</country>
+      </tld>
+
+      <tld domain="gi">
+        <country>Gibraltar</country>
+      </tld>
+
+      <tld domain="gl">
+        <country>Greenland</country>
+      </tld>
+
+      <tld domain="gm">
+        <country>Gambia</country>
+      </tld>
+
+      <tld domain="gn">
+        <country>Guinea</country>
+      </tld>
+
+      <tld domain="gp">
+        <country>Guadeloupe</country>
+      </tld>
+
+      <tld domain="gq">
+        <country>Equatorial Guinea</country>
+      </tld>
+
+      <tld domain="gr">
+        <country>Greece</country>
+      </tld>
+
+      <tld domain="gs">
+        <country>South Georgia and the South Sandwich Islands</country>
+      </tld>
+
+      <tld domain="gt">
+        <country>Guatemala</country>
+      </tld>
+
+      <tld domain="gu">
+        <country>Guam</country>
+      </tld>
+
+      <tld domain="gw">
+        <country>Guinea Bissau</country>
+      </tld>
+
+      <tld domain="gy">
+        <country>Guyana</country>
+      </tld>
+
+      <tld domain="hk">
+        <country>Hong Kong</country>
+      </tld>
+
+      <tld domain="hm">
+        <country>Heard Island and McDonald Islands</country>
+      </tld>
+
+      <tld domain="hn">
+        <country>Honduras</country>
+      </tld>
+
+      <tld domain="hr">
+        <country>Croatia</country>
+      </tld>
+
+      <tld domain="ht">
+        <country>Haiti</country>
+      </tld>
+
+      <tld domain="hu">
+        <country>Hungary</country>
+      </tld>
+
+      <tld domain="id">
+        <country>Indonesia</country>
+      </tld>
+
+      <tld domain="ie">
+        <country>Ireland</country>
+      </tld>
+
+      <tld domain="il">
+        <country>Israel</country>
+      </tld>
+
+      <tld domain="im">
+        <country>Isle of Man</country>
+      </tld>
+
+      <tld domain="in">
+        <country>India</country>
+      </tld>
+
+      <tld domain="io">
+        <country>British Indian Ocean Territory</country>
+      </tld>
+
+      <tld domain="iq">
+        <country>Iraq</country>
+      </tld>
+
+      <tld domain="ir">
+        <country>Iran</country>
+      </tld>
+
+      <tld domain="is">
+        <country>Iceland</country>
+      </tld>
+
+      <tld domain="it">
+        <country>Italy</country>
+      </tld>
+
+      <tld domain="je">
+        <country>Jersey</country>
+      </tld>
+
+      <tld domain="jm">
+        <country>Jamaica</country>
+      </tld>
+
+      <tld domain="jo">
+        <country>Jordan</country>
+      </tld>
+
+      <tld domain="jp">
+        <country>Japan</country>
+      </tld>
+
+      <tld domain="ke">
+        <country>Kenya</country>
+      </tld>
+
+      <tld domain="kg">
+        <country>Kyrgyzstan</country>
+      </tld>
+
+      <tld domain="kh">
+        <country>Cambodia</country>
+      </tld>
+
+      <tld domain="ki">
+        <country>Kiribati</country>
+      </tld>
+
+      <tld domain="km">
+        <country>Comoros</country>
+      </tld>
+
+      <tld domain="kn">
+        <country>Saint Kitts and Nevis</country>
+      </tld>
+
+      <tld domain="kp">
+        <country>North Korea</country>
+        <status>NOT_IN_USE</status>
+
+        <description>not assigned; no DNS</description>
+      </tld>
+
+      <tld domain="kr">
+        <country>South Korea</country>
+      </tld>
+
+      <tld domain="kw">
+        <country>Kuwait</country>
+      </tld>
+
+      <tld domain="ky">
+        <country>Cayman Islands</country>
+      </tld>
+
+      <tld domain="kz">
+        <country>Kazakhstan</country>
+      </tld>
+
+      <tld domain="la">
+        <country>Laos</country>
+      </tld>
+
+      <tld domain="lb">
+        <country>Lebanon</country>
+      </tld>
+
+      <tld domain="lc">
+        <country>Saint Lucia</country>
+      </tld>
+
+      <tld domain="li">
+        <country>Liechtenstein</country>
+      </tld>
+
+      <tld domain="lk">
+        <country>Sri Lanka</country>
+      </tld>
+
+      <tld domain="lr">
+        <country>Liberia</country>
+      </tld>
+
+      <tld domain="ls">
+        <country>Lesotho</country>
+      </tld>
+
+      <tld domain="lt">
+        <country>Lithuania</country>
+      </tld>
+
+      <tld domain="lu">
+        <country>Luxembourg</country>
+      </tld>
+
+      <tld domain="lv">
+        <country>Latvia</country>
+      </tld>
+
+      <tld domain="ly">
+        <country>Libya</country>
+      </tld>
+
+      <tld domain="ma">
+        <country>Morocco</country>
+      </tld>
+
+      <tld domain="mc">
+        <country>Monaco</country>
+      </tld>
+
+      <tld domain="md">
+        <country>Moldova</country>
+      </tld>
+
+      <tld domain="me">
+        <country>Montenegro</country>
+      </tld>
+
+      <tld domain="mg">
+        <country>Madagascar</country>
+      </tld>
+
+      <tld domain="mh">
+        <country>Marshall Islands</country>
+      </tld>
+
+      <tld domain="mk">
+        <country>Republic of Macedonia</country>
+      </tld>
+
+      <tld domain="ml">
+        <country>Mali</country>
+      </tld>
+
+      <tld domain="mm">
+        <country>Myanmar</country>
+        <description>formerly .bu - Burma</description>
+      </tld>
+
+      <tld domain="mn">
+        <country>Mongolia</country>
+      </tld>
+
+      <tld domain="mo">
+        <country>Macau</country>
+      </tld>
+
+      <tld domain="mp">
+        <country>Northern Mariana Islands</country>
+      </tld>
+
+      <tld domain="mq">
+        <country>Martinique</country>
+      </tld>
+
+      <tld domain="mr">
+        <country>Mauritania</country>
+      </tld>
+
+      <tld domain="ms">
+        <country>Montserrat</country>
+      </tld>
+
+      <tld domain="mt">
+        <country>Malta</country>
+      </tld>
+
+      <tld domain="mu">
+        <country>Mauritius</country>
+      </tld>
+
+      <tld domain="mv">
+        <country>Maldives</country>
+      </tld>
+
+      <tld domain="mw">
+        <country>Malawi</country>
+      </tld>
+
+      <tld domain="mx">
+        <country>Mexico</country>
+      </tld>
+
+      <tld domain="my">
+        <country>Malaysia</country>
+      </tld>
+
+      <tld domain="mz">
+        <country>Mozambique</country>
+      </tld>
+
+      <tld domain="na">
+        <country>Namibia</country>
+      </tld>
+
+      <tld domain="nc">
+        <country>New Caledonia</country>
+      </tld>
+
+      <tld domain="ne">
+        <country>Niger</country>
+      </tld>
+
+      <tld domain="nf">
+        <country>Norfolk Island</country>
+      </tld>
+
+      <tld domain="ng">
+        <country>Nigeria</country>
+      </tld>
+
+      <tld domain="ni">
+        <country>Nicaragua</country>
+      </tld>
+
+      <tld domain="nl">
+        <country>Netherlands</country>
+      </tld>
+
+      <tld domain="no">
+        <country>Norway</country>
+      </tld>
+
+      <tld domain="np">
+        <country>Nepal</country>
+      </tld>
+
+      <tld domain="nr">
+        <country>Nauru</country>
+      </tld>
+
+      <tld domain="nu">
+        <country>Niue</country>
+      </tld>
+
+      <tld domain="nz">
+        <country>New Zealand</country>
+      </tld>
+
+      <tld domain="om">
+        <country>Oman</country>
+      </tld>
+
+      <tld domain="pa">
+        <country>Panama</country>
+      </tld>
+
+      <tld domain="pe">
+        <country>Peru</country>
+      </tld>
+
+      <tld domain="pf">
+        <country>French Polynesia</country>
+      </tld>
+
+      <tld domain="pg">
+        <country>Papua New Guinea</country>
+      </tld>
+
+      <tld domain="ph">
+        <country>Philippines</country>
+      </tld>
+
+      <tld domain="pk">
+        <country>Pakistan</country>
+      </tld>
+
+      <tld domain="pl">
+        <country>Poland</country>
+      </tld>
+
+      <tld domain="pm">
+        <country>Saint Pierre and Miquelon</country>
+      </tld>
+
+      <tld domain="pn">
+        <country>Pitcairn Islands</country>
+      </tld>
+
+      <tld domain="pr">
+        <country>Puerto Rico</country>
+      </tld>
+
+      <tld domain="ps">
+        <country>Palestinian territories</country>
+      </tld>
+
+      <tld domain="pt">
+        <country>Portugal</country>
+      </tld>
+
+      <tld domain="pw">
+        <country>Palau</country>
+      </tld>
+
+      <tld domain="py">
+        <country>Paraguay</country>
+      </tld>
+
+      <tld domain="qa">
+        <country>Qatar</country>
+      </tld>
+
+      <tld domain="re">
+        <country>Réunion</country>
+      </tld>
+
+      <tld domain="ro">
+        <country>Romania</country>
+      </tld>
+
+      <tld domain="rs">
+        <country>Serbia</country>
+      </tld>
+
+      <tld domain="ru">
+        <country>Russia</country>
+      </tld>
+
+      <tld domain="rw">
+        <country>Rwanda</country>
+      </tld>
+
+      <tld domain="sa">
+        <country>Saudi Arabia</country>
+      </tld>
+
+      <tld domain="sb">
+        <country>Solomon Islands</country>
+      </tld>
+
+      <tld domain="sc">
+        <country>Seychelles</country>
+      </tld>
+
+      <tld domain="sd">
+        <country>Sudan</country>
+      </tld>
+
+      <tld domain="se">
+        <country>Sweden</country>
+      </tld>
+
+      <tld domain="sg">
+        <country>Singapore</country>
+      </tld>
+
+      <tld domain="sh">
+        <country>Saint Helena</country>
+      </tld>
+
+      <tld domain="si">
+        <country>Slovenia</country>
+      </tld>
+
+      <tld domain="sj">
+        <country>Svalbard and Jan Mayen Islands</country>
+        <status>NOT_IN_USE</status>
+        <description>not in use; no registrations</description>
+      </tld>
+
+      <tld domain="sk">
+        <country>Slovakia</country>
+      </tld>
+
+      <tld domain="sl">
+        <country>Sierra Leone</country>
+      </tld>
+
+      <tld domain="sm">
+        <country>San Marino</country>
+      </tld>
+
+      <tld domain="sn">
+        <country>Senegal</country>
+      </tld>
+
+      <tld domain="so">
+        <country>Somalia</country>
+      </tld>
+
+      <tld domain="sr">
+        <country>Suriname</country>
+      </tld>
+
+      <tld domain="st">
+        <country>São Tomé and Príncipe</country>
+      </tld>
+
+      <tld domain="su">
+        <country>Soviet Union</country>
+        <status>DELETED</status>
+        <description>
+          deprecated; being phased out; code "transitionally reserved"
+          by ISO 3166-1
+        </description>
+      </tld>
+
+      <tld domain="sv">
+        <country>El Salvador</country>
+      </tld>
+
+      <tld domain="sy">
+        <country>Syria</country>
+      </tld>
+
+      <tld domain="sz">
+        <country>Swaziland</country>
+      </tld>
+
+      <tld domain="tc">
+        <country>Turks and Caicos Islands</country>
+      </tld>
+
+      <tld domain="td">
+        <country>Chad</country>
+      </tld>
+
+      <tld domain="tf">
+        <country>French Southern Territories</country>
+      </tld>
+
+      <tld domain="tg">
+        <country>Togo</country>
+      </tld>
+
+      <tld domain="th">
+        <country>Thailand</country>
+      </tld>
+
+      <tld domain="tj">
+        <country>Tajikistan</country>
+      </tld>
+
+      <tld domain="tk">
+        <country>Tokelau</country>
+      </tld>
+
+      <tld domain="tl">
+        <country>East Timor</country>
+        <description>formerly .tp</description>
+      </tld>
+
+      <tld domain="tm">
+        <country>Turkmenistan</country>
+      </tld>
+
+      <tld domain="tn">
+        <country>Tunisia</country>
+      </tld>
+
+      <tld domain="to">
+        <country>Tonga</country>
+      </tld>
+
+      <tld domain="tp">
+        <country>East Timor</country>
+        <status>DELETED</status>
+        <description>
+          deprecated - use .tl; code "transitionally reserved" by ISO
+          3166-1
+        </description>
+      </tld>
+
+      <tld domain="tr">
+        <country>Turkey</country>
+      </tld>
+
+      <tld domain="tt">
+        <country>Trinidad and Tobago</country>
+      </tld>
+
+      <tld domain="tv">
+        <country>Tuvalu</country>
+      </tld>
+
+      <tld domain="tw">
+        <country>Republic of China</country>
+        <description>Taiwan</description>
+      </tld>
+
+      <tld domain="tz">
+        <country>Tanzania</country>
+      </tld>
+
+      <tld domain="ua">
+        <country>Ukraine</country>
+      </tld>
+
+      <tld domain="ug">
+        <country>Uganda</country>
+      </tld>
+
+      <tld domain="uk">
+        <country>United Kingdom</country>
+        <description>
+          code "exceptionally reserved" by ISO 3166-1 (see also .gb)
+        </description>
+      </tld>
+
+      <tld domain="um">
+        <country>United States Minor Outlying Islands</country>
+        <status>DELETED</status>
+        <description>see http://en.wikipedia.org/wiki/.um</description>
+      </tld>
+
+      <tld domain="us">
+        <country>United States</country>
+      </tld>
+
+      <tld domain="uy">
+        <country>Uruguay</country>
+      </tld>
+
+      <tld domain="uz">
+        <country>Uzbekistan</country>
+      </tld>
+
+      <tld domain="va">
+        <country>Vatican City</country>
+      </tld>
+
+      <tld domain="vc">
+        <country>Saint Vincent and the Grenadines</country>
+      </tld>
+
+      <tld domain="ve">
+        <country>Venezuela</country>
+      </tld>
+
+      <tld domain="vg">
+        <country>British Virgin Islands</country>
+      </tld>
+
+      <tld domain="vi">
+        <country>United States Virgin Islands</country>
+      </tld>
+
+      <tld domain="vn">
+        <country>Vietnam</country>
+      </tld>
+
+      <tld domain="vu">
+        <country>Vanuatu</country>
+      </tld>
+
+      <tld domain="wf">
+        <country>Wallis and Futuna</country>
+      </tld>
+
+      <tld domain="ws">
+        <country>Samoa</country>
+        <description>formerly Western Samoa</description>
+      </tld>
+
+      <tld domain="ye">
+        <country>Yemen</country>
+      </tld>
+
+      <tld domain="yt">
+        <country>Mayotte</country>
+      </tld>
+
+      <tld domain="yu">
+        <country>Yugoslavia</country>
+        <description>
+          subsequently renamed Serbia and Montenegro (code officially
+          replaced by .cs (see above) but still used; code
+          "transitionally reserved" by ISO 3166-1)
+        </description>
+      </tld>
+
+      <tld domain="za">
+        <country>South Africa</country>
+      </tld>
+
+      <tld domain="zm">
+        <country>Zambia</country>
+      </tld>
+
+      <tld domain="zr">
+        <country>Zaire</country>
+        <status>DELETED</status>
+        <description>replaced by .cd</description>
+      </tld>
+
+      <tld domain="zw">
+        <country>Zimbabwe</country>
+      </tld>
+    </cctlds>
+  </tlds>
+
+  <!--  Second Level Domains -->
+  <suffixes>
+
+    <!-- .us Second Level Domains -->
+    <suffix domain="as.us" />
+    <suffix domain="gu.us" />
+    <suffix domain="pr.us" />
+    <suffix domain="vi.us" />
+
+    <!-- .uk Second Level Domains -->
+    <suffix domain="ac.uk" />
+    <suffix domain="co.uk" />
+    <suffix domain="gov.uk" />
+    <suffix domain="ltd.uk" />
+    <suffix domain="me.uk" />
+    <suffix domain="mod.uk" />
+    <suffix domain="net.uk" />
+    <suffix domain="nic.uk" />
+    <suffix domain="nhs.uk" />
+    <suffix domain="org.uk" />
+    <suffix domain="pic.uk" />
+    <suffix domain="police.uk" />
+    <suffix domain="govt.uk">
+      <status>DELETED</status>
+    </suffix>
+    <suffix domain="orgn.uk">
+      <status>DELETED</status>
+    </suffix>
+    <suffix domain="mil.uk">
+      <status>DELETED</status>
+    </suffix>
+    <suffix domain="lea.uk">
+      <status>DELETED</status>
+    </suffix>
+
+
+    <!-- .tr Second Level Domains -->
+    <suffix domain="com.tr" />
+    <suffix domain="gen.tr" />
+    <suffix domain="org.tr"/>
+    <suffix domain="biz.tr" />
+    <suffix domain="info.tr" />
+    <suffix domain="av.tr" />
+    <suffix domain="dr.tr" />
+    <suffix domain="pol.tr" />
+    <suffix domain="bel.tr" />
+    <suffix domain="mil.tr" />
+    <suffix domain="mil.tr" />
+    <suffix domain="bbs.tr" />
+    <suffix domain="k12.tr" />
+    <suffix domain="edu.tr"/>
+    <suffix domain="name.tr" />
+    <suffix domain="net.tr" />
+    <suffix domain="gov.tr" />
+    <suffix domain="gov.tr" />
+    <suffix domain="web.tr" />
+    <suffix domain="tel.tr" />
+    <suffix domain="tv.tr" />
+
+    <!-- .hk Second Level Domains -->
+    <suffix domain="ust.hk" />
+    <suffix domain="hku.hk" />
+    <suffix domain="cuhk.hk" />
+
+    <!-- .it Second Level Domains -->
+
+    <suffix domain="edu.it" />
+    <suffix domain="gov.it" />
+    <suffix domain="italy.it" />
+
+    <!-- .az Second Level Domains -->
+    <suffix domain="pro.az" />
+
+    <!-- .au Second Level Domains -->
+
+    <suffix domain="com.au" />
+    <suffix domain="net.au" />
+    <suffix domain="gov.au" />
+    <suffix domain="org.au" />
+    <suffix domain="edu.au" />
+    <suffix domain="csiro.au" />
+    <suffix domain="asn.au" />
+    <suffix domain="id.au" />
+    <!--  New second level domains -->
+    <suffix domain="act.au" />
+    <suffix domain="nsw.au" />
+    <suffix domain="nt.au" />
+    <suffix domain="qld.au" />
+    <suffix domain="sa.au" />
+    <suffix domain="tas.au" />
+    <suffix domain="vic.au" />
+    <suffix domain="wa.au" />
+
+    <!-- .int Second Level Domains -->
+    <suffix domain="eu.int" />
+
+    <!-- .br Second Level Domains -->
+    <suffix domain="adm.br" />
+    <suffix domain="adv.br" />
+    <suffix domain="agr.br" />
+    <suffix domain="am.br" />
+    <suffix domain="arq.br" />
+    <suffix domain="art.br" />
+    <suffix domain="ato.br" />
+    <suffix domain="bio.br" />
+    <suffix domain="blog.br" />
+    <suffix domain="bmd.br" />
+    <suffix domain="cim.br" />
+    <suffix domain="cng.br" />
+    <suffix domain="cnt.br" />
+    <suffix domain="com.br" />
+    <suffix domain="coop.br" />
+    <suffix domain="ecn.br" />
+    <suffix domain="edu.br" />
+    <suffix domain="eng.br" />
+    <suffix domain="esp.br" />
+    <suffix domain="etc.br" />
+    <suffix domain="eti.br" />
+    <suffix domain="far.br" />
+    <suffix domain="flog.br" />
+    <suffix domain="fm.br" />
+    <suffix domain="fnd.br" />
+    <suffix domain="fot.br" />
+    <suffix domain="fst.br" />
+    <suffix domain="g12.br" />
+    <suffix domain="ggf.br" />
+    <suffix domain="gov.br" />
+    <suffix domain="imb.br" />
+    <suffix domain="ind.br" />
+    <suffix domain="inf.br" />
+    <suffix domain="jor.br" />
+    <suffix domain="lel.br" />
+    <suffix domain="mat.br" />
+    <suffix domain="med.br" />
+    <suffix domain="mil.br" />
+    <suffix domain="mus.br" />
+    <suffix domain="net.br" />
+    <suffix domain="nom.br" />
+    <suffix domain="not.br" />
+    <suffix domain="ntr.br" />
+    <suffix domain="odo.br" />
+    <suffix domain="org.br" />
+    <suffix domain="ppg.br" />
+    <suffix domain="pro.br" />
+    <suffix domain="psc.br" />
+    <suffix domain="psi.br" />
+    <suffix domain="qsl.br" />
+    <suffix domain="rec.br" />
+    <suffix domain="slg.br" />
+    <suffix domain="srv.br" />
+    <suffix domain="tmp.br" />
+    <suffix domain="trd.br" />
+    <suffix domain="tur.br" />
+    <suffix domain="tv.br" />
+    <suffix domain="vet.br" />
+    <suffix domain="vlog.br" />
+    <suffix domain="wiki.br" />
+    <suffix domain="zlg.br" />
+
+    <!-- 
+      Below elements are generated from the file 
+      at http://publicsuffix.org/.  
+      see org.apache.nutch.util.domain.MozillaPublicSuffixListParser.java
+    -->
+
+    <!--  ac : http://en.wikipedia.org/wiki/.am-->
+    <suffix domain="com.ac" />
+    <suffix domain="edu.ac" />
+    <suffix domain="gov.ac" />
+    <suffix domain="net.ac" />
+    <suffix domain="mil.ac" />
+    <suffix domain="org.ac" />
+
+    <!--  ad : http://en.wikipedia.org/wiki/.ad-->
+    <suffix domain="nom.ad" />
+
+    <!--  ae : http://en.wikipedia.org/wiki/.ae-->
+    <suffix domain="net.ae" />
+    <suffix domain="gov.ae" />
+    <suffix domain="ac.ae" />
+    <suffix domain="sch.ae" />
+    <suffix domain="org.ae" />
+    <suffix domain="mil.ae" />
+    <suffix domain="pro.ae" />
+    <suffix domain="name.ae" />
+
+    <!--  aero : see http://www.information.aero/index.php?id=66-->
+    <suffix domain="accident-investigation.aero" />
+    <suffix domain="accident-prevention.aero" />
+    <suffix domain="aerobatic.aero" />
+    <suffix domain="aeroclub.aero" />
+    <suffix domain="aerodrome.aero" />
+    <suffix domain="agents.aero" />
+    <suffix domain="aircraft.aero" />
+    <suffix domain="airline.aero" />
+    <suffix domain="airport.aero" />
+    <suffix domain="air-surveillance.aero" />
+    <suffix domain="airtraffic.aero" />
+    <suffix domain="air-traffic-control.aero" />
+    <suffix domain="ambulance.aero" />
+    <suffix domain="amusement.aero" />
+    <suffix domain="association.aero" />
+    <suffix domain="author.aero" />
+    <suffix domain="ballooning.aero" />
+    <suffix domain="broker.aero" />
+    <suffix domain="caa.aero" />
+    <suffix domain="cargo.aero" />
+    <suffix domain="catering.aero" />
+    <suffix domain="certification.aero" />
+    <suffix domain="championship.aero" />
+    <suffix domain="charter.aero" />
+    <suffix domain="civilaviation.aero" />
+    <suffix domain="club.aero" />
+    <suffix domain="conference.aero" />
+    <suffix domain="consultant.aero" />
+    <suffix domain="consulting.aero" />
+    <suffix domain="control.aero" />
+    <suffix domain="council.aero" />
+    <suffix domain="crew.aero" />
+    <suffix domain="design.aero" />
+    <suffix domain="dgca.aero" />
+    <suffix domain="educator.aero" />
+    <suffix domain="emergency.aero" />
+    <suffix domain="engine.aero" />
+    <suffix domain="engineer.aero" />
+    <suffix domain="entertainment.aero" />
+    <suffix domain="equipment.aero" />
+    <suffix domain="exchange.aero" />
+    <suffix domain="express.aero" />
+    <suffix domain="federation.aero" />
+    <suffix domain="flight.aero" />
+    <suffix domain="freight.aero" />
+    <suffix domain="fuel.aero" />
+    <suffix domain="gliding.aero" />
+    <suffix domain="government.aero" />
+    <suffix domain="groundhandling.aero" />
+    <suffix domain="group.aero" />
+    <suffix domain="hanggliding.aero" />
+    <suffix domain="homebuilt.aero" />
+    <suffix domain="insurance.aero" />
+    <suffix domain="journal.aero" />
+    <suffix domain="journalist.aero" />
+    <suffix domain="leasing.aero" />
+    <suffix domain="logistics.aero" />
+    <suffix domain="magazine.aero" />
+    <suffix domain="maintenance.aero" />
+    <suffix domain="marketplace.aero" />
+    <suffix domain="media.aero" />
+    <suffix domain="microlight.aero" />
+    <suffix domain="modelling.aero" />
+    <suffix domain="navigation.aero" />
+    <suffix domain="parachuting.aero" />
+    <suffix domain="paragliding.aero" />
+    <suffix domain="passenger-association.aero" />
+    <suffix domain="pilot.aero" />
+    <suffix domain="press.aero" />
+    <suffix domain="production.aero" />
+    <suffix domain="recreation.aero" />
+    <suffix domain="repbody.aero" />
+    <suffix domain="res.aero" />
+    <suffix domain="research.aero" />
+    <suffix domain="rotorcraft.aero" />
+    <suffix domain="safety.aero" />
+    <suffix domain="scientist.aero" />
+    <suffix domain="services.aero" />
+    <suffix domain="show.aero" />
+    <suffix domain="skydiving.aero" />
+    <suffix domain="software.aero" />
+    <suffix domain="student.aero" />
+    <suffix domain="taxi.aero" />
+    <suffix domain="trader.aero" />
+    <suffix domain="trading.aero" />
+    <suffix domain="trainer.aero" />
+    <suffix domain="union.aero" />
+    <suffix domain="workinggroup.aero" />
+    <suffix domain="works.aero" />
+
+    <!--  af : http://www.nic.af/help.jsp-->
+    <suffix domain="gov.af" />
+    <suffix domain="com.af" />
+    <suffix domain="org.af" />
+    <suffix domain="net.af" />
+    <suffix domain="edu.af" />
+
+    <!--  ag : http://www.nic.ag/prices.htm-->
+    <suffix domain="com.ag" />
+    <suffix domain="org.ag" />
+    <suffix domain="net.ag" />
+    <suffix domain="co.ag" />
+    <suffix domain="nom.ag" />
+
+    <!--  ai : http://nic.com.ai/-->
+    <suffix domain="off.ai" />
+    <suffix domain="com.ai" />
+    <suffix domain="net.ai" />
+    <suffix domain="org.ai" />
+
+    <!--  al : http://www.inima.al/Domains.html-->
+    <suffix domain="gov.al" />
+    <suffix domain="edu.al" />
+    <suffix domain="org.al" />
+    <suffix domain="com.al" />
+    <suffix domain="net.al" />
+
+    <!--  am : http://en.wikipedia.org/wiki/.am-->
+
+    <!--  an : http://www.una.an/an_domreg/default.asp-->
+    <suffix domain="com.an" />
+    <suffix domain="net.an" />
+    <suffix domain="org.an" />
+    <suffix domain="edu.an" />
+
+    <!--  ao : http://en.wikipedia.org/wiki/.ao-->
+    <!--  list of 2nd level TLDs ?-->
+
+    <!--  aq : http://en.wikipedia.org/wiki/.aq-->
+
+    <!--  ar : http://en.wikipedia.org/wiki/.ar-->
+    <suffix domain="congresodelalengua3.ar" />
+    <suffix domain="educ.ar" />
+    <suffix domain="gobiernoelectronico.ar" />
+    <suffix domain="mecon.ar" />
+    <suffix domain="nacion.ar" />
+    <suffix domain="nic.ar" />
+    <suffix domain="promocion.ar" />
+    <suffix domain="retina.ar" />
+    <suffix domain="uba.ar" />
+
+    <suffix domain="com.ar" />
+    <suffix domain="edu.ar" />
+    <suffix domain="gob.ar" />
+    <suffix domain="gov.ar" />
+    <suffix domain="int.ar" />
+    <suffix domain="mil.ar" />
+    <suffix domain="net.ar" />
+    <suffix domain="org.ar" />
+    <suffix domain="tur.ar" />
+
+    <!--  arpa : http://en.wikipedia.org/wiki/.arpa-->
+    <suffix domain="e164.arpa" />
+    <suffix domain="in-addr.arpa" />
+    <suffix domain="ip6.arpa" />
+    <suffix domain="uri.arpa" />
+    <suffix domain="urn.arpa" />
+
+    <!--  as : http://en.wikipedia.org/wiki/.as-->
+
+    <!--  at : http://en.wikipedia.org/wiki/.at-->
+    <suffix domain="gv.at" />
+    <suffix domain="ac.at" />
+    <suffix domain="co.at" />
+    <suffix domain="or.at" />
+
+    <!--  au : http://en.wikipedia.org/wiki/.au-->
+    <!--  au geographical names (vic.au etc... are covered above)-->
+    <suffix domain="act.edu.au" />
+    <suffix domain="nsw.edu.au" />
+    <suffix domain="nt.edu.au" />
+    <suffix domain="qld.edu.au" />
+    <suffix domain="sa.edu.au" />
+    <suffix domain="tas.edu.au" />
+    <suffix domain="vic.edu.au" />
+    <suffix domain="wa.edu.au" />
+    <suffix domain="act.gov.au" />
+    <suffix domain="nsw.gov.au" />
+    <suffix domain="nt.gov.au" />
+    <suffix domain="qld.gov.au" />
+    <suffix domain="sa.gov.au" />
+    <suffix domain="tas.gov.au" />
+    <suffix domain="vic.gov.au" />
+    <suffix domain="wa.gov.au" />
+
+    <!--  aw : http://en.wikipedia.org/wiki/.aw-->
+    <suffix domain="com.aw" />
+
+    <!--  ax : http://en.wikipedia.org/wiki/.ax-->
+
+    <!--  az : http://en.wikipedia.org/wiki/.az-->
+    <suffix domain="com.az" />
+    <suffix domain="net.az" />
+    <suffix domain="int.az" />
+    <suffix domain="gov.az" />
+    <suffix domain="org.az" />
+    <suffix domain="edu.az" />
+    <suffix domain="info.az" />
+    <suffix domain="pp.az" />
+    <suffix domain="mil.az" />
+    <suffix domain="name.az" />
+    <suffix domain="biz.az" />
+
+    <!--  ba : http://en.wikipedia.org/wiki/.ba-->
+    <suffix domain="org.ba" />
+    <suffix domain="net.ba" />
+    <suffix domain="edu.ba" />
+    <suffix domain="gov.ba" />
+    <suffix domain="mil.ba" />
+    <suffix domain="unsa.ba" />
+    <suffix domain="unbi.ba" />
+    <suffix domain="co.ba" />
+    <suffix domain="com.ba" />
+    <suffix domain="rs.ba" />
+
+    <!--  bb : http://en.wikipedia.org/wiki/.bb-->
+    <suffix domain="com.bb" />
+    <suffix domain="edu.bb" />
+    <suffix domain="gov.bb" />
+    <suffix domain="net.bb" />
+    <suffix domain="org.bb" />
+
+    <!--  bd : http://en.wikipedia.org/wiki/.bd-->
+
+    <!--  be : http://en.wikipedia.org/wiki/.be-->
+    <suffix domain="ac.be" />
+
+    <!--  bf : http://en.wikipedia.org/wiki/.bf-->
+
+    <!--  bg : http://en.wikipedia.org/wiki/.bg-->
+
+    <!--  bh : http://en.wikipedia.org/wiki/.bh-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  bi : http://en.wikipedia.org/wiki/.bi-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  biz : http://en.wikipedia.org/wiki/.biz-->
+
+    <!--  bj : http://en.wikipedia.org/wiki/.bj-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  bm : http://www.bermudanic.bm/dnr-text.txt-->
+    <suffix domain="com.bm" />
+    <suffix domain="edu.bm" />
+    <suffix domain="gov.bm" />
+    <suffix domain="net.bm" />
+    <suffix domain="org.bm" />
+
+    <!--  bn : http://en.wikipedia.org/wiki/.bn-->
+
+    <!--  bo : http://www.nic.bo/-->
+    <suffix domain="com.bo" />
+    <suffix domain="edu.bo" />
+    <suffix domain="gov.bo" />
+    <suffix domain="gob.bo" />
+    <suffix domain="int.bo" />
+    <suffix domain="org.bo" />
+    <suffix domain="net.bo" />
+    <suffix domain="mil.bo" />
+    <suffix domain="tv.bo" />
+
+    <!--  br : http://en.wikipedia.org/wiki/.br-->
+
+    <!--  bs : http://www.nic.bs/rules.html-->
+    <suffix domain="com.bs" />
+    <suffix domain="net.bs" />
+    <suffix domain="org.bs" />
+    <suffix domain="edu.bs" />
+    <suffix domain="gov.bs" />
+
+    <!--  bt : http://en.wikipedia.org/wiki/.bt-->
+
+    <!--  bw : http://en.wikipedia.org/wiki/.bw-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  by : http://en.wikipedia.org/wiki/.by-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  bz : http://en.wikipedia.org/wiki/.bz-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  ca : http://en.wikipedia.org/wiki/.ca-->
+    <!--  ca geographical names-->
+    <suffix domain="ab.ca" />
+    <suffix domain="bc.ca" />
+    <suffix domain="mb.ca" />
+    <suffix domain="nb.ca" />
+    <suffix domain="nf.ca" />
+    <suffix domain="nl.ca" />
+    <suffix domain="ns.ca" />
+    <suffix domain="nt.ca" />
+    <suffix domain="nu.ca" />
+    <suffix domain="on.ca" />
+    <suffix domain="pe.ca" />
+    <suffix domain="qc.ca" />
+    <suffix domain="sk.ca" />
+    <suffix domain="yk.ca" />
+
+    <!--  cat : http://en.wikipedia.org/wiki/.cat-->
+
+    <!--  cc : http://en.wikipedia.org/wiki/.cc-->
+
+    <!--  cd : http://en.wikipedia.org/wiki/.cd-->
+
+    <!--  cf : http://en.wikipedia.org/wiki/.cf-->
+
+    <!--  cg : http://en.wikipedia.org/wiki/.cg-->
+
+    <!--  ch : http://en.wikipedia.org/wiki/.ch-->
+
+    <!--  ci : http://en.wikipedia.org/wiki/.ci-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  ck : http://en.wikipedia.org/wiki/.ck-->
+
+    <!--  cl : http://en.wikipedia.org/wiki/.cl-->
+
+    <!--  cm : http://en.wikipedia.org/wiki/.cm-->
+
+    <!--  cn : http://en.wikipedia.org/wiki/.cn-->
+    <suffix domain="ac.cn" />
+    <suffix domain="com.cn" />
+    <suffix domain="edu.cn" />
+    <suffix domain="gov.cn" />
+    <suffix domain="net.cn" />
+    <suffix domain="org.cn" />
+    <!--  cn geographic names-->
+    <suffix domain="ah.cn" />
+    <suffix domain="bj.cn" />
+    <suffix domain="cq.cn" />
+    <suffix domain="fj.cn" />
+    <suffix domain="gd.cn" />
+    <suffix domain="gs.cn" />
+    <suffix domain="gz.cn" />
+    <suffix domain="gx.cn" />
+    <suffix domain="ha.cn" />
+    <suffix domain="hb.cn" />
+    <suffix domain="he.cn" />
+    <suffix domain="hi.cn" />
+    <suffix domain="hl.cn" />
+    <suffix domain="hn.cn" />
+    <suffix domain="jl.cn" />
+    <suffix domain="js.cn" />
+    <suffix domain="jx.cn" />
+    <suffix domain="ln.cn" />
+    <suffix domain="nm.cn" />
+    <suffix domain="nx.cn" />
+    <suffix domain="qh.cn" />
+    <suffix domain="sc.cn" />
+    <suffix domain="sd.cn" />
+    <suffix domain="sh.cn" />
+    <suffix domain="sn.cn" />
+    <suffix domain="sx.cn" />
+    <suffix domain="tj.cn" />
+    <suffix domain="xj.cn" />
+    <suffix domain="xz.cn" />
+    <suffix domain="yn.cn" />
+    <suffix domain="zj.cn" />
+
+    <!--  co : http://en.wikipedia.org/wiki/.co-->
+
+    <suffix domain="com.co" />
+    <suffix domain="org.co" />
+    <suffix domain="edu.co" />
+    <suffix domain="gov.co" />
+    <suffix domain="net.co" />
+    <suffix domain="mil.co" />
+    <suffix domain="nom.co" />
+
+    <!--  com : http://en.wikipedia.org/wiki/.com-->
+
+    <!--  coop : http://en.wikipedia.org/wiki/.coop-->
+
+    <!--  cr : http://en.wikipedia.org/wiki/.cr-->
+
+    <!--  cu : http://en.wikipedia.org/wiki/.cu-->
+    <suffix domain="com.cu" />
+    <suffix domain="edu.cu" />
+    <suffix domain="org.cu" />
+    <suffix domain="net.cu" />
+    <suffix domain="gov.cu" />
+    <suffix domain="inf.cu" />
+
+    <!--  cv : http://en.wikipedia.org/wiki/.cv-->
+
+    <!--  cx : http://en.wikipedia.org/wiki/.cx-->
+
+    <!--  cy : http://en.wikipedia.org/wiki/.cy-->
+
+    <!--  cz : http://en.wikipedia.org/wiki/.cz-->
+
+    <!--  de : http://en.wikipedia.org/wiki/.de-->
+
+    <!--  dj : http://en.wikipedia.org/wiki/.dj-->
+
+    <!--  dk : http://en.wikipedia.org/wiki/.dk-->
+
+    <!--  dm : http://en.wikipedia.org/wiki/.dm-->
+    <suffix domain="com.dm" />
+    <suffix domain="net.dm" />
+    <suffix domain="org.dm" />
+
+    <!--  do : http://en.wikipedia.org/wiki/.do-->
+
+    <!--  dz : http://en.wikipedia.org/wiki/.dz-->
+    <suffix domain="com.dz" />
+    <suffix domain="org.dz" />
+    <suffix domain="net.dz" />
+    <suffix domain="gov.dz" />
+    <suffix domain="edu.dz" />
+    <suffix domain="asso.dz" />
+    <suffix domain="pol.dz" />
+    <suffix domain="art.dz" />
+
+    <!--  ec : http://www.nic.ec/reg/paso1.asp-->
+    <suffix domain="com.ec" />
+    <suffix domain="info.ec" />
+    <suffix domain="net.ec" />
+    <suffix domain="fin.ec" />
+    <suffix domain="med.ec" />
+    <suffix domain="pro.ec" />
+    <suffix domain="org.ec" />
+    <suffix domain="edu.ec" />
+    <suffix domain="gov.ec" />
+    <suffix domain="mil.ec" />
+
+    <!--  edu : http://en.wikipedia.org/wiki/.edu-->
+
+    <!--  ee : http://www3.eenet.ee/ee/application.html-->
+    <suffix domain="com.ee" />
+    <suffix domain="org.ee" />
+    <suffix domain="fie.ee" />
+    <suffix domain="pri.ee" />
+
+    <!--  eg : http://en.wikipedia.org/wiki/.eg-->
+
+    <!--  er : http://en.wikipedia.org/wiki/.er-->
+
+    <!--  es : https://www.nic.es/site_ingles/ingles/dominios/index.html-->
+    <suffix domain="com.es" />
+    <suffix domain="nom.es" />
+    <suffix domain="org.es" />
+    <suffix domain="gob.es" />
+    <suffix domain="edu.es" />
+
+    <!--  et : http://en.wikipedia.org/wiki/.et-->
+
+    <!--  eu : http://en.wikipedia.org/wiki/.eu-->
+
+    <!--  fi : http://en.wikipedia.org/wiki/.fi-->
+
+    <!--  fj : http://en.wikipedia.org/wiki/.fj-->
+
+    <!--  fk : http://en.wikipedia.org/wiki/.fk-->
+
+    <!--  fm : http://en.wikipedia.org/wiki/.fm-->
+
+    <!--  fo : http://en.wikipedia.org/wiki/.fo-->
+
+    <!--  fr : http://www.afnic.fr/-->
+    <!--  domaines descriptifs : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-descriptifs-->
+    <suffix domain="com.fr" />
+    <suffix domain="asso.fr" />
+    <suffix domain="nom.fr" />
+    <suffix domain="prd.fr" />
+    <suffix domain="presse.fr" />
+    <suffix domain="tm.fr" />
+    <!--  domaines sectoriels : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-sectoriels-->
+    <suffix domain="aeroport.fr" />
+    <suffix domain="assedic.fr" />
+    <suffix domain="avocat.fr" />
+    <suffix domain="avoues.fr" />
+    <suffix domain="cci.fr" />
+    <suffix domain="chambagri.fr" />
+    <suffix domain="chirurgiens-dentistes.fr" />
+    <suffix domain="experts-comptables.fr" />
+    <suffix domain="geometre-expert.fr" />
+    <suffix domain="gouv.fr" />
+    <suffix domain="greta.fr" />
+    <suffix domain="huissier-justice.fr" />
+    <suffix domain="medecin.fr" />
+    <suffix domain="notaires.fr" />
+    <suffix domain="pharmacien.fr" />
+    <suffix domain="port.fr" />
+    <suffix domain="veterinaire.fr" />
+
+    <!--  ga : http://en.wikipedia.org/wiki/.ga-->
+
+    <!--  gd : http://en.wikipedia.org/wiki/.gd-->
+
+    <!--  ge : http://www.nic.net.ge/policy_en.pdf-->
+    <suffix domain="com.ge" />
+    <suffix domain="edu.ge" />
+    <suffix domain="gov.ge" />
+    <suffix domain="org.ge" />
+    <suffix domain="mil.ge" />
+    <suffix domain="net.ge" />
+    <suffix domain="pvt.ge" />
+
+    <!--  gf : http://en.wikipedia.org/wiki/.gf-->
+
+    <!--  gg : http://www.channelisles.net/tandc.shtml-->
+    <suffix domain="co.gg" />
+    <suffix domain="org.gg" />
+    <suffix domain="net.gg" />
+    <suffix domain="sch.gg" />
+    <suffix domain="gov.gg" />
+
+    <!--  gh : http://www.ghana.com/domain.htm-->
+
+    <!--  gi : http://www.nic.gi/rules.html-->
+    <suffix domain="com.gi" />
+    <suffix domain="ltd.gi" />
+    <suffix domain="gov.gi" />
+    <suffix domain="mod.gi" />
+    <suffix domain="edu.gi" />
+    <suffix domain="org.gi" />
+
+    <!--  gl : http://en.wikipedia.org/wiki/.gl-->
+
+    <!--  gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm-->
+
+    <!--  gn : http://psg.com/dns/gn/gn.txt-->
+
+    <!--  gov : http://en.wikipedia.org/wiki/.gov-->
+
+    <!--  gp : http://www.nic.gp/index_en.php?url=charte_en.php-->
+    <suffix domain="com.gp" />
+    <suffix domain="net.gp" />
+    <suffix domain="edu.gp" />
+    <suffix domain="org.gp" />
+
+    <!--  gq : http://en.wikipedia.org/wiki/.gq-->
+
+    <!--  gr : https://grweb.ics.forth.gr/english/1617-B-2002.html-->
+    <suffix domain="com.gr" />
+    <suffix domain="edu.gr" />
+    <suffix domain="net.gr" />
+    <suffix domain="org.gr" />
+    <suffix domain="gov.gr" />
+
+    <!--  gs : http://en.wikipedia.org/wiki/.gs-->
+
+    <!--  gt : http://www.gt/politicas.html-->
+
+    <!--  gu : http://gadao.gov.gu/registration.txt-->
+
+    <!--  gw : http://en.wikipedia.org/wiki/.gw-->
+
+    <!--  gy : http://en.wikipedia.org/wiki/.gy-->
+
+    <!--  hk : http://en.wikipedia.org/wiki/.hk-->
+    <suffix domain="com.hk" />
+    <suffix domain="edu.hk" />
+    <suffix domain="gov.hk" />
+    <suffix domain="idv.hk" />
+    <suffix domain="net.hk" />
+    <suffix domain="org.hk" />
+
+    <!--  hm : http://en.wikipedia.org/wiki/.hm-->
+
+    <!--  hn : http://www.nic.hn/politicas/ps02,,05.html-->
+    <suffix domain="com.hn" />
+    <suffix domain="edu.hn" />
+    <suffix domain="org.hn" />
+    <suffix domain="net.hn" />
+    <suffix domain="mil.hn" />
+    <suffix domain="gob.hn" />
+
+    <!--  hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf-->
+    <suffix domain="iz.hr" />
+    <suffix domain="from.hr" />
+    <suffix domain="name.hr" />
+    <suffix domain="com.hr" />
+
+    <!--  ht : http://www.nic.ht/info/charte.cfm-->
+    <suffix domain="com.ht" />
+    <suffix domain="shop.ht" />
+    <suffix domain="firm.ht" />
+    <suffix domain="info.ht" />
+    <suffix domain="adult.ht" />
+    <suffix domain="net.ht" />
+    <suffix domain="pro.ht" />
+    <suffix domain="org.ht" />
+    <suffix domain="med.ht" />
+    <suffix domain="art.ht" />
+    <suffix domain="coop.ht" />
+    <suffix domain="pol.ht" />
+    <suffix domain="asso.ht" />
+    <suffix domain="edu.ht" />
+    <suffix domain="rel.ht" />
+    <suffix domain="gouv.ht" />
+    <suffix domain="perso.ht" />
+
+    <!--  hu : http://www.domain.hu/domain/English/sld.html-->
+    <suffix domain="co.hu" />
+    <suffix domain="info.hu" />
+    <suffix domain="org.hu" />
+    <suffix domain="priv.hu" />
+    <suffix domain="sport.hu" />
+    <suffix domain="tm.hu" />
+    <suffix domain="2000.hu" />
+    <suffix domain="agrar.hu" />
+    <suffix domain="bolt.hu" />
+    <suffix domain="casino.hu" />
+    <suffix domain="city.hu" />
+    <suffix domain="erotica.hu" />
+    <suffix domain="erotika.hu" />
+    <suffix domain="film.hu" />
+    <suffix domain="forum.hu" />
+    <suffix domain="games.hu" />
+    <suffix domain="hotel.hu" />
+    <suffix domain="ingatlan.hu" />
+    <suffix domain="jogasz.hu" />
+    <suffix domain="konyvelo.hu" />
+    <suffix domain="lakas.hu" />
+    <suffix domain="media.hu" />
+    <suffix domain="news.hu" />
+    <suffix domain="reklam.hu" />
+    <suffix domain="sex.hu" />
+    <suffix domain="shop.hu" />
+    <suffix domain="suli.hu" />
+    <suffix domain="szex.hu" />
+    <suffix domain="tozsde.hu" />
+    <suffix domain="utazas.hu" />
+    <suffix domain="video.hu" />
+
+    <!--  id : http://en.wikipedia.org/wiki/.id-->
+
+    <suffix domain="ac.id" />
+    <suffix domain="co.id" />
+    <suffix domain="net.id" />
+    <suffix domain="or.id" />
+    <suffix domain="web.id" />
+    <suffix domain="sch.id" />
+    <suffix domain="mil.id" />
+    <suffix domain="go.id" />
+
+    <!--  ie : http://en.wikipedia.org/wiki/.ie-->
+
+    <!--  il : http://en.wikipedia.org/wiki/.il-->
+   <suffix domain="ac.il" />
+   <suffix domain="co.il" />
+   <suffix domain="org.il" />
+   <suffix domain="net.il" />
+   <suffix domain="k12.il" />
+   <suffix domain="gov.il" />
+   <suffix domain="muni.il" />
+   <suffix domain="idf.il" />
+
+    <!--  im : https://www.nic.im/pdfs/imfaqs.pdf-->
+    <suffix domain="co.im" />
+    <suffix domain="ltd.co.im" />
+    <suffix domain="plc.co.im" />
+    <suffix domain="net.im" />
+    <suffix domain="gov.im" />
+    <suffix domain="org.im" />
+    <suffix domain="nic.im" />
+    <suffix domain="ac.im" />
+
+    <!--  in : http://en.wikipedia.org/wiki/.in-->
+    <suffix domain="co.in" />
+    <suffix domain="firm.in" />
+    <suffix domain="net.in" />
+    <suffix domain="org.in" />
+    <suffix domain="gen.in" />
+    <suffix domain="ind.in" />
+    <suffix domain="nic.in" />
+    <suffix domain="ac.in" />
+    <suffix domain="edu.in" />
+    <suffix domain="res.in" />
+    <suffix domain="gov.in" />
+    <suffix domain="mil.in" />
+
+    <!--  info : http://en.wikipedia.org/wiki/.info-->
+
+    <!--  int : http://en.wikipedia.org/wiki/.int-->
+
+    <!--  io : http://www.nic.io/rules.html-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  iq : http://en.wikipedia.org/wiki/.iq-->
+    <!--  no registrar website found, but google shows .gov.iq and .edu.iq websites-->
+    <suffix domain="gov.iq" />
+    <suffix domain="edu.iq" />
+
+    <!--  ir : http://www.nic.ir/ascii/Appendix1.htm-->
+    <suffix domain="ac.ir" />
+    <suffix domain="co.ir" />
+    <suffix domain="gov.ir" />
+    <suffix domain="id.ir" />
+    <suffix domain="net.ir" />
+    <suffix domain="org.ir" />
+    <suffix domain="sch.ir" />
+
+    <!--  is : http://www.isnic.is/domain/rules.php-->
+    <suffix domain="net.is" />
+    <suffix domain="com.is" />
+    <suffix domain="edu.is" />
+    <suffix domain="gov.is" />
+    <suffix domain="org.is" />
+    <suffix domain="int.is" />
+
+    <!--  it : http://en.wikipedia.org/wiki/.it-->
+    <suffix domain="gov.edu" />
+    <!--  geo-names found at http://www.nic.it/RA/en/domini/regole/nomi-riservati.pdf-->
+    <suffix domain="agrigento.it" />
+    <suffix domain="ag.it" />
+    <suffix domain="alessandria.it" />
+    <suffix domain="al.it" />
+    <suffix domain="ancona.it" />
+    <suffix domain="an.it" />
+    <suffix domain="aosta.it" />
+    <suffix domain="aoste.it" />
+    <suffix domain="ao.it" />
+    <suffix domain="arezzo.it" />
+    <suffix domain="ar.it" />
+    <suffix domain="ascoli-piceno.it" />
+    <suffix domain="ascolipiceno.it" />
+    <suffix domain="ap.it" />
+    <suffix domain="asti.it" />
+    <suffix domain="at.it" />
+    <suffix domain="avellino.it" />
+    <suffix domain="av.it" />
+    <suffix domain="bari.it" />
+    <suffix domain="ba.it" />
+    <suffix domain="barlettaandriatrani.it" />
+    <suffix domain="barletta-andria-trani.it" />
+    <suffix domain="belluno.it" />
+    <suffix domain="bl.it" />
+    <suffix domain="benevento.it" />
+    <suffix domain="bn.it" />
+    <suffix domain="bergamo.it" />
+    <suffix domain="bg.it" />
+    <suffix domain="biella.it" />
+    <suffix domain="bi.it" />
+    <suffix domain="bologna.it" />
+    <suffix domain="bo.it" />
+    <suffix domain="bolzano.it" />
+    <suffix domain="bozen.it" />
+    <suffix domain="balsan.it" />
+    <suffix domain="alto-adige.it" />
+    <suffix domain="altoadige.it" />
+    <suffix domain="suedtirol.it" />
+    <suffix domain="bz.it" />
+    <suffix domain="brescia.it" />
+    <suffix domain="bs.it" />
+    <suffix domain="brindisi.it" />
+    <suffix domain="br.it" />
+    <suffix domain="cagliari.it" />
+    <suffix domain="ca.it" />
+    <suffix domain="caltanissetta.it" />
+    <suffix domain="cl.it" />
+    <suffix domain="campobasso.it" />
+    <suffix domain="cb.it" />
+    <suffix domain="caserta.it" />
+    <suffix domain="ce.it" />
+    <suffix domain="catania.it" />
+    <suffix domain="ct.it" />
+    <suffix domain="catanzaro.it" />
+    <suffix domain="cz.it" />
+    <suffix domain="chieti.it" />
+    <suffix domain="ch.it" />
+    <suffix domain="como.it" />
+    <suffix domain="co.it" />
+    <suffix domain="cosenza.it" />
+    <suffix domain="cs.it" />
+    <suffix domain="cremona.it" />
+    <suffix domain="cr.it" />
+    <suffix domain="crotone.it" />
+    <suffix domain="kr.it" />
+    <suffix domain="cuneo.it" />
+    <suffix domain="cn.it" />
+    <suffix domain="enna.it" />
+    <suffix domain="en.it" />
+    <suffix domain="fermo.it" />
+    <suffix domain="ferrara.it" />
+    <suffix domain="fe.it" />
+    <suffix domain="firenze.it" />
+    <suffix domain="florence.it" />
+    <suffix domain="fi.it" />
+    <suffix domain="foggia.it" />
+    <suffix domain="fg.it" />
+    <suffix domain="forli-cesena.it" />
+    <suffix domain="forlicesena.it" />
+    <suffix domain="fc.it" />
+    <suffix domain="frosinone.it" />
+    <suffix domain="fr.it" />
+    <suffix domain="genova.it" />
+    <suffix domain="genoa.it" />
+    <suffix domain="ge.it" />
+    <suffix domain="gorizia.it" />
+    <suffix domain="go.it" />
+    <suffix domain="grosseto.it" />
+    <suffix domain="gr.it" />
+    <suffix domain="imperia.it" />
+    <suffix domain="im.it" />
+    <suffix domain="isernia.it" />
+    <suffix domain="is.it" />
+    <suffix domain="laquila.it" />
+    <suffix domain="aquila.it" />
+    <suffix domain="aq.it" />
+    <suffix domain="la-spezia.it" />
+    <suffix domain="laspezia.it" />
+    <suffix domain="sp.it" />
+    <suffix domain="latina.it" />
+    <suffix domain="lt.it" />
+    <suffix domain="lecce.it" />
+    <suffix domain="le.it" />
+    <suffix domain="lecco.it" />
+    <suffix domain="lc.it" />
+    <suffix domain="livorno.it" />
+    <suffix domain="li.it" />
+    <suffix domain="lodi.it" />
+    <suffix domain="lo.it" />
+    <suffix domain="lucca.it" />
+    <suffix domain="lu.it" />
+    <suffix domain="macerata.it" />
+    <suffix domain="mc.it" />
+    <suffix domain="mantova.it" />
+    <suffix domain="mn.it" />
+    <suffix domain="massa-carrara.it" />
+    <suffix domain="massacarrara.it" />
+    <suffix domain="ms.it" />
+    <suffix domain="matera.it" />
+    <suffix domain="mt.it" />
+    <suffix domain="messina.it" />
+    <suffix domain="me.it" />
+    <suffix domain="milano.it" />
+    <suffix domain="milan.it" />
+    <suffix domain="mi.it" />
+    <suffix domain="modena.it" />
+    <suffix domain="mo.it" />
+    <suffix domain="monza.it" />
+    <suffix domain="napoli.it" />
+    <suffix domain="naples.it" />
+    <suffix domain="na.it" />
+    <suffix domain="novara.it" />
+    <suffix domain="no.it" />
+    <suffix domain="nuoro.it" />
+    <suffix domain="nu.it" />
+    <suffix domain="oristano.it" />
+    <suffix domain="or.it" />
+    <suffix domain="padova.it" />
+    <suffix domain="padua.it" />
+    <suffix domain="pd.it" />
+    <suffix domain="palermo.it" />
+    <suffix domain="pa.it" />
+    <suffix domain="parma.it" />
+    <suffix domain="pr.it" />
+    <suffix domain="pavia.it" />
+    <suffix domain="pv.it" />
+    <suffix domain="perugia.it" />
+    <suffix domain="pg.it" />
+    <suffix domain="pescara.it" />
+    <suffix domain="pe.it" />
+    <suffix domain="pesaro-urbino.it" />
+    <suffix domain="pesarourbino.it" />
+    <suffix domain="pu.it" />
+    <suffix domain="piacenza.it" />
+    <suffix domain="pc.it" />
+    <suffix domain="pisa.it" />
+    <suffix domain="pi.it" />
+    <suffix domain="pistoia.it" />
+    <suffix domain="pt.it" />
+    <suffix domain="pordenone.it" />
+    <suffix domain="pn.it" />
+    <suffix domain="potenza.it" />
+    <suffix domain="pz.it" />
+    <suffix domain="prato.it" />
+    <suffix domain="po.it" />
+    <suffix domain="ragusa.it" />
+    <suffix domain="rg.it" />
+    <suffix domain="ravenna.it" />
+    <suffix domain="ra.it" />
+    <suffix domain="reggio-calabria.it" />
+    <suffix domain="reggiocalabria.it" />
+    <suffix domain="rc.it" />
+    <suffix domain="reggio-emilia.it" />
+    <suffix domain="reggioemilia.it" />
+    <suffix domain="re.it" />
+    <suffix domain="rieti.it" />
+    <suffix domain="ri.it" />
+    <suffix domain="rimini.it" />
+    <suffix domain="rn.it" />
+    <suffix domain="roma.it" />
+    <suffix domain="rome.it" />
+    <suffix domain="rm.it" />
+    <suffix domain="rovigo.it" />
+    <suffix domain="ro.it" />
+    <suffix domain="salerno.it" />
+    <suffix domain="sa.it" />
+    <suffix domain="sassari.it" />
+    <suffix domain="ss.it" />
+    <suffix domain="savona.it" />
+    <suffix domain="sv.it" />
+    <suffix domain="siena.it" />
+    <suffix domain="si.it" />
+    <suffix domain="siracusa.it" />
+    <suffix domain="sr.it" />
+    <suffix domain="sondrio.it" />
+    <suffix domain="so.it" />
+    <suffix domain="taranto.it" />
+    <suffix domain="ta.it" />
+    <suffix domain="teramo.it" />
+    <suffix domain="te.it" />
+    <suffix domain="terni.it" />
+    <suffix domain="tr.it" />
+    <suffix domain="torino.it" />
+    <suffix domain="turin.it" />
+    <suffix domain="to.it" />
+    <suffix domain="trapani.it" />
+    <suffix domain="tp.it" />
+    <suffix domain="trento.it" />
+    <suffix domain="trentino.it" />
+    <suffix domain="tn.it" />
+    <suffix domain="treviso.it" />
+    <suffix domain="tv.it" />
+    <suffix domain="trieste.it" />
+    <suffix domain="ts.it" />
+    <suffix domain="udine.it" />
+    <suffix domain="ud.it" />
+    <suffix domain="varese.it" />
+    <suffix domain="va.it" />
+    <suffix domain="venezia.it" />
+    <suffix domain="venice.it" />
+    <suffix domain="ve.it" />
+    <suffix domain="verbania.it" />
+    <suffix domain="vb.it" />
+    <suffix domain="vercelli.it" />
+    <suffix domain="vc.it" />
+    <suffix domain="verona.it" />
+    <suffix domain="vr.it" />
+    <suffix domain="vibo-valentia.it" />
+    <suffix domain="vibovalentia.it" />
+    <suffix domain="vv.it" />
+    <suffix domain="vicenza.it" />
+    <suffix domain="vi.it" />
+    <suffix domain="viterbo.it" />
+    <suffix domain="vt.it" />
+
+    <!--  je : http://www.channelisles.net/tandc.shtml-->
+    <suffix domain="co.je" />
+    <suffix domain="org.je" />
+    <suffix domain="net.je" />
+    <suffix domain="sch.je" />
+    <suffix domain="gov.je" />
+
+    <!--  jm : http://www.com.jm/register.html-->
+
+    <!--  jo : http://www.nis.gov.jo/dns/reg.html-->
+    <suffix domain="com.jo" />
+    <suffix domain="org.jo" />
+    <suffix domain="net.jo" />
+    <suffix domain="edu.jo" />
+    <suffix domain="gov.jo" />
+    <suffix domain="mil.jo" />
+    <suffix domain="myname.jo" />
+
+    <!--  jobs : http://en.wikipedia.org/wiki/.jobs-->
+
+    <!--  jp : http://en.wikipedia.org/wiki/.jp-->
+    <suffix domain="ac.jp" />
+    <suffix domain="ad.jp" />
+    <suffix domain="co.jp" />
+    <suffix domain="ed.jp" />
+    <suffix domain="go.jp" />
+    <suffix domain="gr.jp" />
+    <suffix domain="lg.jp" />
+    <suffix domain="ne.jp" />
+    <suffix domain="or.jp" />
+    <!--  jp geographical names-->
+    <!--  I can't find an official english explanantrion, but used https://bugzilla.mozilla.org/show_bug.cgi?id=252342#c31-->
+    <suffix domain="aichi.jp" />
+    <suffix domain="akita.jp" />
+    <suffix domain="aomori.jp" />
+    <suffix domain="chiba.jp" />
+    <suffix domain="ehime.jp" />
+    <suffix domain="fukui.jp" />
+    <suffix domain="fukuoka.jp" />
+    <suffix domain="fukushima.jp" />
+    <suffix domain="gifu.jp" />
+    <suffix domain="gunma.jp" />
+    <suffix domain="hiroshima.jp" />
+    <suffix domain="hokkaido.jp" />
+    <suffix domain="hyogo.jp" />
+    <suffix domain="ibaraki.jp" />
+    <suffix domain="ishikawa.jp" />
+    <suffix domain="iwate.jp" />
+    <suffix domain="kagawa.jp" />
+    <suffix domain="kagoshima.jp" />
+    <suffix domain="kanagawa.jp" />
+    <suffix domain="kawasaki.jp" />
+    <suffix domain="kitakyushu.jp" />
+    <suffix domain="kobe.jp" />
+    <suffix domain="kochi.jp" />
+    <suffix domain="kumamoto.jp" />
+    <suffix domain="kyoto.jp" />
+    <suffix domain="mie.jp" />
+    <suffix domain="miyagi.jp" />
+    <suffix domain="miyazaki.jp" />
+    <suffix domain="nagano.jp" />
+    <suffix domain="nagasaki.jp" />
+    <suffix domain="nagoya.jp" />
+    <suffix domain="nara.jp" />
+    <suffix domain="niigata.jp" />
+    <suffix domain="oita.jp" />
+    <suffix domain="okayama.jp" />
+    <suffix domain="okinawa.jp" />
+    <suffix domain="osaka.jp" />
+    <suffix domain="saga.jp" />
+    <suffix domain="saitama.jp" />
+    <suffix domain="sapporo.jp" />
+    <suffix domain="sendai.jp" />
+    <suffix domain="shiga.jp" />
+    <suffix domain="shimane.jp" />
+    <suffix domain="shizuoka.jp" />
+    <suffix domain="tochigi.jp" />
+    <suffix domain="tokushima.jp" />
+    <suffix domain="tokyo.jp" />
+    <suffix domain="tottori.jp" />
+    <suffix domain="toyama.jp" />
+    <suffix domain="wakayama.jp" />
+    <suffix domain="yamagata.jp" />
+    <suffix domain="yamaguchi.jp" />
+    <suffix domain="yamanashi.jp" />
+    <suffix domain="yokohama.jp" />
+    <suffix domain="metro.tokyo.jp" />
+    <suffix domain="pref.aichi.jp" />
+    <suffix domain="pref.akita.jp" />
+    <suffix domain="pref.aomori.jp" />
+    <suffix domain="pref.chiba.jp" />
+    <suffix domain="pref.ehime.jp" />
+    <suffix domain="pref.fukui.jp" />
+    <suffix domain="pref.fukuoka.jp" />
+    <suffix domain="pref.fukushima.jp" />
+    <suffix domain="pref.gifu.jp" />
+    <suffix domain="pref.gunma.jp" />
+    <suffix domain="pref.hiroshima.jp" />
+    <suffix domain="pref.hokkaido.jp" />
+    <suffix domain="pref.hyogo.jp" />
+    <suffix domain="pref.ibaraki.jp" />
+    <suffix domain="pref.ishikawa.jp" />
+    <suffix domain="pref.iwate.jp" />
+    <suffix domain="pref.kagawa.jp" />
+    <suffix domain="pref.kagoshima.jp" />
+    <suffix domain="pref.kanagawa.jp" />
+    <suffix domain="pref.kochi.jp" />
+    <suffix domain="pref.kumamoto.jp" />
+    <suffix domain="pref.kyoto.jp" />
+    <suffix domain="pref.mie.jp" />
+    <suffix domain="pref.miyagi.jp" />
+    <suffix domain="pref.miyazaki.jp" />
+    <suffix domain="pref.nagano.jp" />
+    <suffix domain="pref.nagasaki.jp" />
+    <suffix domain="pref.nara.jp" />
+    <suffix domain="pref.niigata.jp" />
+    <suffix domain="pref.oita.jp" />
+    <suffix domain="pref.okayama.jp" />
+    <suffix domain="pref.okinawa.jp" />
+    <suffix domain="pref.osaka.jp" />
+    <suffix domain="pref.saga.jp" />
+    <suffix domain="pref.saitama.jp" />
+    <suffix domain="pref.shiga.jp" />
+    <suffix domain="pref.shimane.jp" />
+    <suffix domain="pref.shizuoka.jp" />
+    <suffix domain="pref.tochigi.jp" />
+    <suffix domain="pref.tokushima.jp" />
+    <suffix domain="pref.tottori.jp" />
+    <suffix domain="pref.toyama.jp" />
+    <suffix domain="pref.wakayama.jp" />
+    <suffix domain="pref.yamagata.jp" />
+    <suffix domain="pref.yamaguchi.jp" />
+    <suffix domain="pref.yamanashi.jp" />
+    <suffix domain="city.chiba.jp" />
+    <suffix domain="city.fukuoka.jp" />
+    <suffix domain="city.hiroshima.jp" />
+    <suffix domain="city.kawasaki.jp" />
+    <suffix domain="city.kitakyushu.jp" />
+    <suffix domain="city.kobe.jp" />
+    <suffix domain="city.kyoto.jp" />
+    <suffix domain="city.nagoya.jp" />
+    <suffix domain="city.osaka.jp" />
+    <suffix domain="city.saitama.jp" />
+    <suffix domain="city.sapporo.jp" />
+    <suffix domain="city.sendai.jp" />
+    <suffix domain="city.shizuoka.jp" />
+    <suffix domain="city.yokohama.jp" />
+
+    <!--  ke : http://www.kenic.or.ke/index.php?option=com_content&task=view&id=117&Itemid=145-->
+
+    <!--  kg : http://www.domain.kg/dmn_n.html-->
+    <suffix domain="org.kg" />
+    <suffix domain="net.kg" />
+    <suffix domain="com.kg" />
+    <suffix domain="edu.kg" />
+    <suffix domain="gov.kg" />
+    <suffix domain="mil.kg" />
+
+    <!--  kh : http://www.mptc.gov.kh/dns_registration.htm-->
+
+    <!--  ki : http://www.ki/dns/index.html-->
+    <suffix domain="edu.ki" />
+    <suffix domain="biz.ki" />
+    <suffix domain="net.ki" />
+    <suffix domain="org.ki" />
+    <suffix domain="gov.ki" />
+    <suffix domain="info.ki" />
+    <suffix domain="com.ki" />
+
+    <!--  km : http://en.wikipedia.org/wiki/.km-->
+
+    <!--  kn : http://en.wikipedia.org/wiki/.kn-->
+
+    <!--  kr : http://domain.nida.or.kr/eng/structure.jsp-->
+    <suffix domain="ac.kr" />
+    <suffix domain="co.kr" />
+    <suffix domain="go.kr" />
+    <suffix domain="ne.kr" />
+    <suffix domain="or.kr" />
+    <suffix domain="re.kr" />
+    <suffix domain="pe.kr" />
+    <suffix domain="한글.kr" />
+
+    <!--  kw : http://en.wikipedia.org/wiki/.kw-->
+
+    <!--  ky : http://www.icta.ky/da_ky_reg_dom.php-->
+    <suffix domain="edu.ky" />
+    <suffix domain="gov.ky" />
+    <suffix domain="com.ky" />
+    <suffix domain="org.ky" />
+    <suffix domain="net.ky" />
+
+    <!--  kz : http://en.wikipedia.org/wiki/.kz-->
+    <suffix domain="org.kz" />
+    <suffix domain="edu.kz" />
+    <suffix domain="net.kz" />
+    <suffix domain="gov.kz" />
+    <suffix domain="mil.kz" />
+    <suffix domain="com.kz" />
+
+    <!--  la : http://en.wikipedia.org/wiki/.la-->
+
+    <!--  lb : http://en.wikipedia.org/wiki/.lb-->
+
+    <!--  lc : http://en.wikipedia.org/wiki/.lc-->
+    <suffix domain="com.lc" />
+    <suffix domain="org.lc" />
+    <suffix domain="edu.lc" />
+    <suffix domain="gov.lc" />
+
+    <!--  li : http://en.wikipedia.org/wiki/.li-->
+
+    <!--  lk : http://www.nic.lk/seclevpr.html-->
+    <suffix domain="gov.lk" />
+    <suffix domain="sch.lk" />
+    <suffix domain="net.lk" />
+    <suffix domain="int.lk" />
+    <suffix domain="com.lk" />
+    <suffix domain="org.lk" />
+    <suffix domain="edu.lk" />
+    <suffix domain="ngo.lk" />
+    <suffix domain="soc.lk" />
+    <suffix domain="web.lk" />
+    <suffix domain="ltd.lk" />
+    <suffix domain="assn.lk" />
+    <suffix domain="grp.lk" />
+    <suffix domain="hotel.lk" />
+
+    <!--  lr : http://psg.com/dns/lr/lr.txt-->
+
+    <!--  ls : http://en.wikipedia.org/wiki/.ls-->
+    <suffix domain="co.ls" />
+    <suffix domain="org.ls" />
+
+    <!--  lt : http://en.wikipedia.org/wiki/.lt-->
+
+    <!--  lu : http://www.dns.lu/en/-->
+
+    <!--  lv : http://www.nic.lv/DNS/En/generic.php-->
+    <suffix domain="com.lv" />
+    <suffix domain="edu.lv" />
+    <suffix domain="gov.lv" />
+    <suffix domain="org.lv" />
+    <suffix domain="mil.lv" />
+    <suffix domain="id.lv" />
+    <suffix domain="net.lv" />
+    <suffix domain="asn.lv" />
+    <suffix domain="conf.lv" />
+
+    <!--  ly : http://www.nic.ly/regulations.php-->
+    <suffix domain="com.ly" />
+    <suffix domain="net.ly" />
+    <suffix domain="gov.ly" />
+    <suffix domain="plc.ly" />
+    <suffix domain="edu.ly" />
+    <suffix domain="sch.ly" />
+    <suffix domain="med.ly" />
+    <suffix domain="org.ly" />
+    <suffix domain="id.ly" />
+
+    <!--  ma : http://en.wikipedia.org/wiki/.ma-->
+    <!--  list of 2nd level tlds ?-->
+    <suffix domain="co.ma" />
+    <suffix domain="net.ma" />
+    <suffix domain="gov.ma" />
+    <suffix domain="org.ma" />
+
+    <!--  mc : http://www.nic.mc/-->
+    <suffix domain="tm.mc" />
+    <suffix domain="asso.mc" />
+
+    <!--  md : http://en.wikipedia.org/wiki/.md-->
+
+    <!--  mg : http://www.nic.mg/tarif.htm-->
+    <suffix domain="org.mg" />
+    <suffix domain="nom.mg" />
+    <suffix domain="gov.mg" />
+    <suffix domain="prd.mg" />
+    <suffix domain="tm.mg" />
+    <suffix domain="edu.mg" />
+    <suffix domain="mil.mg" />
+    <suffix domain="com.mg" />
+
+    <!--  mh : http://en.wikipedia.org/wiki/.mh-->
+
+    <!--  mil : http://en.wikipedia.org/wiki/.mil-->
+
+    <!--  mk : http://en.wikipedia.org/wiki/.mk-->
+    <!--  list of 2nd level tlds ?-->
+    <suffix domain="com.mk" />
+    <suffix domain="gov.mk" />
+    <suffix domain="org.mk" />
+    <suffix domain="net.mk" />
+    <suffix domain="edu.mk" />
+
+    <!--  ml : http://www.gobin.info/domainname/ml-template.doc-->
+
+    <!--  mm : http://en.wikipedia.org/wiki/.mm-->
+
+    <!--  mn : http://en.wikipedia.org/wiki/.mn-->
+    <suffix domain="gov.mn" />
+    <suffix domain="edu.mn" />
+    <suffix domain="org.mn" />
+
+    <!--  mo : http://www.monic.net.mo/-->
+    <suffix domain="com.mo" />
+    <suffix domain="net.mo" />
+    <suffix domain="org.mo" />
+    <suffix domain="edu.mo" />
+    <suffix domain="gov.mo" />
+
+    <!--  mobi : http://en.wikipedia.org/wiki/.mobi-->
+
+    <!--  mp : http://www.dot.mp/-->
+
+    <!--  mq : http://en.wikipedia.org/wiki/.mq-->
+
+    <!--  mr : http://en.wikipedia.org/wiki/.mr-->
+
+    <!--  ms : http://en.wikipedia.org/wiki/.ms-->
+
+    <!--  mt : https://www.nic.org.mt/dotmt/-->
+
+    <!--  mu : http://en.wikipedia.org/wiki/.mu-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  museum : http://about.museum/naming/-->
+    <!--  there are 2nd-level TLD's, but there's no list-->
+
+    <!--  mv : http://en.wikipedia.org/wiki/.mv-->
+
+    <!--  mw : http://www.registrar.mw/-->
+    <suffix domain="ac.mw" />
+    <suffix domain="biz.mw" />
+    <suffix domain="co.mw" />
+    <suffix domain="com.mw" />
+    <suffix domain="coop.mw" />
+    <suffix domain="edu.mw" />
+    <suffix domain="gov.mw" />
+    <suffix domain="int.mw" />
+    <suffix domain="net.mw" />
+    <suffix domain="org.mw" />
+
+    <!--  mx : http://www.nic.mx/-->
+    <suffix domain="com.mx" />
+    <suffix domain="edu.mx" />
+    <suffix domain="gob.mx" />
+    <suffix domain="net.mx" />
+    <suffix domain="org.mx" />
+
+    <!--  my : http://www.mynic.net.my/-->
+
+    <!--  mz : http://www.gobin.info/domainname/mz-template.doc-->
+
+    <!--  na : http://www.na-nic.com.na/-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  name : has 2nd-level tlds, but there's no list of them-->
+
+    <!--  nc : http://www.cctld.nc/-->
+
+    <!--  ne : http://en.wikipedia.org/wiki/.ne-->
+
+    <!--  net : http://en.wikipedia.org/wiki/.net-->
+
+    <!--  nf : http://en.wikipedia.org/wiki/.nf-->
+    <suffix domain="com.nf" />
+    <suffix domain="net.nf" />
+    <suffix domain="per.nf" />
+    <suffix domain="rec.nf" />
+    <suffix domain="web.nf" />
+    <suffix domain="arts.nf" />
+    <suffix domain="firm.nf" />
+    <suffix domain="info.nf" />
+    <suffix domain="other.nf" />
+    <suffix domain="store.nf" />
+
+    <!--  ng : http://psg.com/dns/ng/-->
+
+    <!--  ni : http://www.nic.ni/dominios.htm-->
+
+    <!--  nl : http://www.domain-registry.nl/ace.php/c,728,122,,,,Home.html-->
+
+    <!--  no : http://www.norid.no/regelverk/index.en.html-->
+    <suffix domain="fhs.no" />
+    <suffix domain="vgs.no" />
+    <suffix domain="fylkesbibl.no" />
+    <suffix domain="folkebibl.no" />
+    <suffix domain="museum.no" />
+    <suffix domain="idrett.no" />
+    <suffix domain="mil.no" />
+    <suffix domain="stat.no" />
+    <suffix domain="dep.no" />
+    <suffix domain="kommune.no" />
+    <suffix domain="herad.no" />
+    <suffix domain="priv.no" />
+    <!--  no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html-->
+    <!--  counties-->
+    <suffix domain="aa.no" />
+    <suffix domain="ah.no" />
+    <suffix domain="bu.no" />
+    <suffix domain="fm.no" />
+    <suffix domain="hl.no" />
+    <suffix domain="hm.no" />
+    <suffix domain="jan-mayen.no" />
+    <suffix domain="mr.no" />
+    <suffix domain="nl.no" />
+    <suffix domain="nt.no" />
+    <suffix domain="of.no" />
+    <suffix domain="ol.no" />
+    <suffix domain="oslo.no" />
+    <suffix domain="rl.no" />
+    <suffix domain="sf.no" />
+    <suffix domain="st.no" />
+    <suffix domain="svalbard.no" />
+    <suffix domain="tm.no" />
+    <suffix domain="tr.no" />
+    <suffix domain="va.no" />
+    <suffix domain="vf.no" />
+    <!--  primary and lower secondary schools per county-->
+    <suffix domain="gs.aa.no" />
+    <suffix domain="gs.ah.no" />
+    <suffix domain="gs.bu.no" />
+    <suffix domain="gs.fm.no" />
+    <suffix domain="gs.hl.no" />
+    <suffix domain="gs.hm.no" />
+    <suffix domain="gs.jan-mayen.no" />
+    <suffix domain="gs.mr.no" />
+    <suffix domain="gs.nl.no" />
+    <suffix domain="gs.nt.no" />
+    <suffix domain="gs.of.no" />
+    <suffix domain="gs.ol.no" />
+    <suffix domain="gs.oslo.no" />
+    <suffix domain="gs.rl.no" />
+    <suffix domain="gs.sf.no" />
+    <suffix domain="gs.st.no" />
+    <suffix domain="gs.svalbard.no" />
+    <suffix domain="gs.tm.no" />
+    <suffix domain="gs.tr.no" />
+    <suffix domain="gs.va.no" />
+    <suffix domain="gs.vf.no" />
+    <!--  cities-->
+    <suffix domain="akrehamn.no" />
+    <suffix domain="Ã¥krehamn.no" />
+    <suffix domain="algard.no" />
+    <suffix domain="ålgård.no" />
+    <suffix domain="arna.no" />
+    <suffix domain="brumunddal.no" />
+    <suffix domain="bryne.no" />
+    <suffix domain="bronnoysund.no" />
+    <suffix domain="brønnøysund.no" />
+    <suffix domain="drobak.no" />
+    <suffix domain="drøbak.no" />
+    <suffix domain="egersund.no" />
+    <suffix domain="fetsund.no" />
+    <suffix domain="floro.no" />
+    <suffix domain="florø.no" />
+    <suffix domain="fredrikstad.no" />
+    <suffix domain="hokksund.no" />
+    <suffix domain="honefoss.no" />
+    <suffix domain="hønefoss.no" />
+    <suffix domain="jessheim.no" />
+    <suffix domain="jorpeland.no" />
+    <suffix domain="jørpeland.no" />
+    <suffix domain="kirkenes.no" />
+    <suffix domain="kopervik.no" />
+    <suffix domain="krokstadelva.no" />
+    <suffix domain="langevag.no" />
+    <suffix domain="langevåg.no" />
+    <suffix domain="leirvik.no" />
+    <suffix domain="mjondalen.no" />
+    <suffix domain="mjøndalen.no" />
+    <suffix domain="mo-i-rana.no" />
+    <suffix domain="mosjoen.no" />
+    <suffix domain="mosjøen.no" />
+    <suffix domain="nesoddtangen.no" />
+    <suffix domain="orkanger.no" />
+    <suffix domain="osoyro.no" />
+    <suffix domain="osøyro.no" />
+    <suffix domain="raholt.no" />
+    <suffix domain="råholt.no" />
+    <suffix domain="sandnessjoen.no" />
+    <suffix domain="sandnessjøen.no" />
+    <suffix domain="skedsmokorset.no" />
+    <suffix domain="slattum.no" />
+    <suffix domain="spjelkavik.no" />
+    <suffix domain="stathelle.no" />
+    <suffix domain="stavern.no" />
+    <suffix domain="stjordalshalsen.no" />
+    <suffix domain="stjørdalshalsen.no" />
+    <suffix domain="tananger.no" />
+    <suffix domain="tranby.no" />
+    <suffix domain="vossevangen.no" />
+    <!--  communities-->
+    <suffix domain="afjord.no" />
+    <suffix domain="Ã¥fjord.no" />
+    <suffix domain="agdenes.no" />
+    <suffix domain="al.no" />
+    <suffix domain="Ã¥l.no" />
+    <suffix domain="alesund.no" />
+    <suffix domain="Ã¥lesund.no" />
+    <suffix domain="alstahaug.no" />
+    <suffix domain="alta.no" />
+    <suffix domain="áltá.no" />
+    <suffix domain="alaheadju.no" />
+    <suffix domain="álaheadju.no" />
+    <suffix domain="alvdal.no" />
+    <suffix domain="amli.no" />
+    <suffix domain="Ã¥mli.no" />
+    <suffix domain="amot.no" />
+    <suffix domain="Ã¥mot.no" />
+    <suffix domain="andebu.no" />
+    <suffix domain="andoy.no" />
+    <suffix domain="andøy.no" />
+    <suffix domain="andasuolo.no" />
+    <suffix domain="ardal.no" />
+    <suffix domain="Ã¥rdal.no" />
+    <suffix domain="aremark.no" />
+    <suffix domain="arendal.no" />
+    <suffix domain="Ã¥s.no" />
+    <suffix domain="aseral.no" />
+    <suffix domain="Ã¥seral.no" />
+    <suffix domain="asker.no" />
+    <suffix domain="askim.no" />
+    <suffix domain="askvoll.no" />
+    <suffix domain="askoy.no" />
+    <suffix domain="askøy.no" />
+    <suffix domain="asnes.no" />
+    <suffix domain="Ã¥snes.no" />
+    <suffix domain="audnedaln.no" />
+    <suffix domain="aukra.no" />
+    <suffix domain="aure.no" />
+    <suffix domain="aurland.no" />
+    <suffix domain="aurskog-holand.no" />
+    <suffix domain="aurskog-høland.no" />
+    <suffix domain="austevoll.no" />
+    <suffix domain="austrheim.no" />
+    <suffix domain="averoy.no" />
+    <suffix domain="averøy.no" />
+    <suffix domain="balestrand.no" />
+    <suffix domain="ballangen.no" />
+    <suffix domain="balat.no" />
+    <suffix domain="bálát.no" />
+    <suffix domain="balsfjord.no" />
+    <suffix domain="bahccavuotna.no" />
+    <suffix domain="báhccavuotna.no" />
+    <suffix domain="bamble.no" />
+    <suffix domain="bardu.no" />
+    <suffix domain="beardu.no" />
+    <suffix domain="beiarn.no" />
+    <suffix domain="bajddar.no" />
+    <suffix domain="bájddar.no" />
+    <suffix domain="baidar.no" />
+    <suffix domain="báidár.no" />
+    <suffix domain="berg.no" />
+    <suffix domain="bergen.no" />
+    <suffix domain="berlevag.no" />
+    <suffix domain="berlevåg.no" />
+    <suffix domain="bearalvahki.no" />
+    <suffix domain="bearalváhki.no" />
+    <suffix domain="bindal.no" />
+    <suffix domain="birkenes.no" />
+    <suffix domain="bjarkoy.no" />
+    <suffix domain="bjarkøy.no" />
+    <suffix domain="bjerkreim.no" />
+    <suffix domain="bjugn.no" />
+    <suffix domain="bodo.no" />
+    <suffix domain="bodø.no" />
+    <suffix domain="badaddja.no" />
+    <suffix domain="bådåddjå.no" />
+    <suffix domain="budejju.no" />
+    <suffix domain="bokn.no" />
+    <suffix domain="bremanger.no" />
+    <suffix domain="bronnoy.no" />
+    <suffix domain="brønnøy.no" />
+    <suffix domain="bygland.no" />
+    <suffix domain="bykle.no" />
+    <suffix domain="barum.no" />
+    <suffix domain="bærum.no" />
+    <suffix domain="bo.telemark.no" />
+    <suffix domain="bø.telemark.no" />
+    <suffix domain="bo.nordland.no" />
+    <suffix domain="bø.nordland.no" />
+    <suffix domain="bievat.no" />
+    <suffix domain="bievát.no" />
+    <suffix domain="bomlo.no" />
+    <suffix domain="bømlo.no" />
+    <suffix domain="batsfjord.no" />
+    <suffix domain="båtsfjord.no" />
+    <suffix domain="bahcavuotna.no" />
+    <suffix domain="báhcavuotna.no" />
+    <suffix domain="dovre.no" />
+    <suffix domain="drammen.no" />
+    <suffix domain="drangedal.no" />
+    <suffix domain="dyroy.no" />
+    <suffix domain="dyrøy.no" />
+    <suffix domain="donna.no" />
+    <suffix domain="dønna.no" />
+    <suffix domain="eid.no" />
+    <suffix domain="eidfjord.no" />
+    <suffix domain="eidsberg.no" />
+    <suffix domain="eidskog.no" />
+    <suffix domain="eidsvoll.no" />
+    <suffix domain="eigersund.no" />
+    <suffix domain="elverum.no" />
+    <suffix domain="enebakk.no" />
+    <suffix domain="engerdal.no" />
+    <suffix domain="etne.no" />
+    <suffix domain="etnedal.no" />
+    <suffix domain="evenes.no" />
+    <suffix domain="evenassi.no" />
+    <suffix domain="evenášši.no" />
+    <suffix domain="evje-og-hornnes.no" />
+    <suffix domain="farsund.no" />
+    <suffix domain="fauske.no" />
+    <suffix domain="fuossko.no" />
+    <suffix domain="fuoisku.no" />
+    <suffix domain="fedje.no" />
+    <suffix domain="fet.no" />
+    <suffix domain="finnoy.no" />
+    <suffix domain="finnøy.no" />
+    <suffix domain="fitjar.no" />
+    <suffix domain="fjaler.no" />
+    <suffix domain="fjell.no" />
+    <suffix domain="flakstad.no" />
+    <suffix domain="flatanger.no" />
+    <suffix domain="flekkefjord.no" />
+    <suffix domain="flesberg.no" />
+    <suffix domain="flora.no" />
+    <suffix domain="fla.no" />
+    <suffix domain="flå.no" />
+    <suffix domain="folldal.no" />
+    <suffix domain="forsand.no" />
+    <suffix domain="fosnes.no" />
+    <suffix domain="fredrikstad.no" />
+    <suffix domain="frei.no" />
+    <suffix domain="frogn.no" />
+    <suffix domain="froland.no" />
+    <suffix domain="frosta.no" />
+    <suffix domain="frana.no" />
+    <suffix domain="fræna.no" />
+    <suffix domain="froya.no" />
+    <suffix domain="frøya.no" />
+    <suffix domain="fusa.no" />
+    <suffix domain="fyresdal.no" />
+    <suffix domain="forde.no" />
+    <suffix domain="førde.no" />
+    <suffix domain="gamvik.no" />
+    <suffix domain="gangaviika.no" />
+    <suffix domain="gaular.no" />
+    <suffix domain="gausdal.no" />
+    <suffix domain="gildeskal.no" />
+    <suffix domain="gildeskål.no" />
+    <suffix domain="giske.no" />
+    <suffix domain="gjemnes.no" />
+    <suffix domain="gjerdrum.no" />
+    <suffix domain="gjerstad.no" />
+    <suffix domain="gjesdal.no" />
+    <suffix domain="gjovik.no" />
+    <suffix domain="gjøvik.no" />
+    <suffix domain="gloppen.no" />
+    <suffix domain="gol.no" />
+    <suffix domain="gran.no" />
+    <suffix domain="grane.no" />
+    <suffix domain="granvin.no" />
+    <suffix domain="gratangen.no" />
+    <suffix domain="grimstad.no" />
+    <suffix domain="grong.no" />
+    <suffix domain="kraanghke.no" />
+    <suffix domain="kråanghke.no" />
+    <suffix domain="grue.no" />
+    <suffix domain="gulen.no" />
+    <suffix domain="hadsel.no" />
+    <suffix domain="halden.no" />
+    <suffix domain="halsa.no" />
+    <suffix domain="hamar.no" />
+    <suffix domain="hamaroy.no" />
+    <suffix domain="habmer.no" />
+    <suffix domain="hábmer.no" />
+    <suffix domain="hapmir.no" />
+    <suffix domain="hápmir.no" />
+    <suffix domain="hammerfest.no" />
+    <suffix domain="hammarfeasta.no" />
+    <suffix domain="hámmárfeasta.no" />
+    <suffix domain="haram.no" />
+    <suffix domain="hareid.no" />
+    <suffix domain="harstad.no" />
+    <suffix domain="hasvik.no" />
+    <suffix domain="aknoluokta.no" />
+    <suffix domain="ákŋoluokta.no" />
+    <suffix domain="hattfjelldal.no" />
+    <suffix domain="aarborte.no" />
+    <suffix domain="haugesund.no" />
+    <suffix domain="hemne.no" />
+    <suffix domain="hemnes.no" />
+    <suffix domain="hemsedal.no" />
+    <suffix domain="heroy.more-og-romsdal.no" />
+    <suffix domain="herøy.møre-og-romsdal.no" />
+    <suffix domain="heroy.nordland.no" />
+    <suffix domain="herøy.nordland.no" />
+    <suffix domain="hitra.no" />
+    <suffix domain="hjartdal.no" />
+    <suffix domain="hjelmeland.no" />
+    <suffix domain="hobol.no" />
+    <suffix domain="hobøl.no" />
+    <suffix domain="hof.no" />
+    <suffix domain="hol.no" />
+    <suffix domain="hole.no" />
+    <suffix domain="holmestrand.no" />
+    <suffix domain="holtalen.no" />
+    <suffix domain="holtålen.no" />
+    <suffix domain="hornindal.no" />
+    <suffix domain="horten.no" />
+    <suffix domain="hurdal.no" />
+    <suffix domain="hurum.no" />
+    <suffix domain="hvaler.no" />
+    <suffix domain="hyllestad.no" />
+    <suffix domain="hagebostad.no" />
+    <suffix domain="hægebostad.no" />
+    <suffix domain="hoyanger.no" />
+    <suffix domain="høyanger.no" />
+    <suffix domain="hoylandet.no" />
+    <suffix domain="høylandet.no" />
+    <suffix domain="ha.no" />
+    <suffix domain="hå.no" />
+    <suffix domain="ibestad.no" />
+    <suffix domain="inderoy.no" />
+    <suffix domain="inderøy.no" />
+    <suffix domain="iveland.no" />
+    <suffix domain="jevnaker.no" />
+    <suffix domain="jondal.no" />
+    <suffix domain="jolster.no" />
+    <suffix domain="jølster.no" />
+    <suffix domain="karasjok.no" />
+    <suffix domain="karasjohka.no" />
+    <suffix domain="kárášjohka.no" />
+    <suffix domain="karlsoy.no" />
+    <suffix domain="galsa.no" />
+    <suffix domain="gálsá.no" />
+    <suffix domain="karmoy.no" />
+    <suffix domain="karmøy.no" />
+    <suffix domain="kautokeino.no" />
+    <suffix domain="guovdageaidnu.no" />
+    <suffix domain="klepp.no" />
+    <suffix domain="klabu.no" />
+    <suffix domain="klæbu.no" />
+    <suffix domain="kongsberg.no" />
+    <suffix domain="kongsvinger.no" />
+    <suffix domain="kragero.no" />
+    <suffix domain="kragerø.no" />
+    <suffix domain="kristiansand.no" />
+    <suffix domain="kristiansund.no" />
+    <suffix domain="krodsherad.no" />
+    <suffix domain="krødsherad.no" />
+    <suffix domain="kvalsund.no" />
+    <suffix domain="rahkkeravju.no" />
+    <suffix domain="ráhkkerávju.no" />
+    <suffix domain="kvam.no" />
+    <suffix domain="kvinesdal.no" />
+    <suffix domain="kvinnherad.no" />
+    <suffix domain="kviteseid.no" />
+    <suffix domain="kvitsoy.no" />
+    <suffix domain="kvitsøy.no" />
+    <suffix domain="kvafjord.no" />
+    <suffix domain="kvæfjord.no" />
+    <suffix domain="giehtavuoatna.no" />
+    <suffix domain="kvanangen.no" />
+    <suffix domain="kvænangen.no" />
+    <suffix domain="navuotna.no" />
+    <suffix domain="návuotna.no" />
+    <suffix domain="kafjord.no" />
+    <suffix domain="kåfjord.no" />
+    <suffix domain="gaivuotna.no" />
+    <suffix domain="gáivuotna.no" />
+    <suffix domain="larvik.no" />
+    <suffix domain="lavangen.no" />
+    <suffix domain="lavagis.no" />
+    <suffix domain="loabat.no" />
+    <suffix domain="loabát.no" />
+    <suffix domain="lebesby.no" />
+    <suffix domain="davvesiida.no" />
+    <suffix domain="leikanger.no" />
+    <suffix domain="leirfjord.no" />
+    <suffix domain="leka.no" />
+    <suffix domain="leksvik.no" />
+    <suffix domain="lenvik.no" />
+    <suffix domain="leangaviika.no" />
+    <suffix domain="lea?gaviika.no" />
+    <suffix domain="lesja.no" />
+    <suffix domain="levanger.no" />
+    <suffix domain="lier.no" />
+    <suffix domain="lierne.no" />
+    <suffix domain="lillehammer.no" />
+    <suffix domain="lillesand.no" />
+    <suffix domain="lindesnes.no" />
+    <suffix domain="lindas.no" />
+    <suffix domain="lindås.no" />
+    <suffix domain="lom.no" />
+    <suffix domain="loppa.no" />
+    <suffix domain="lahppi.no" />
+    <suffix domain="láhppi.no" />
+    <suffix domain="lund.no" />
+    <suffix domain="lunner.no" />
+    <suffix domain="luroy.no" />
+    <suffix domain="lurøy.no" />
+    <suffix domain="luster.no" />
+    <suffix domain="lyngdal.no" />
+    <suffix domain="lyngen.no" />
+    <suffix domain="ivgu.no" />
+    <suffix domain="lardal.no" />
+    <suffix domain="lerdal.no" />
+    <suffix domain="lærdal.no" />
+    <suffix domain="lodingen.no" />
+    <suffix domain="lødingen.no" />
+    <suffix domain="lorenskog.no" />
+    <suffix domain="lørenskog.no" />
+    <suffix domain="loten.no" />
+    <suffix domain="løten.no" />
+    <suffix domain="malvik.no" />
+    <suffix domain="masoy.no" />
+    <suffix domain="måsøy.no" />
+    <suffix domain="muosat.no" />
+    <suffix domain="muosát.no" />
+    <suffix domain="mandal.no" />
+    <suffix domain="marker.no" />
+    <suffix domain="marnardal.no" />
+    <suffix domain="masfjorden.no" />
+    <suffix domain="meland.no" />
+    <suffix domain="meldal.no" />
+    <suffix domain="melhus.no" />
+    <suffix domain="meloy.no" />
+    <suffix domain="meløy.no" />
+    <suffix domain="meraker.no" />
+    <suffix domain="meråker.no" />
+    <suffix domain="moareke.no" />
+    <suffix domain="moåreke.no" />
+    <suffix domain="midsund.no" />
+    <suffix domain="midtre-gauldal.no" />
+    <suffix domain="modalen.no" />
+    <suffix domain="modum.no" />
+    <suffix domain="molde.no" />
+    <suffix domain="moskenes.no" />
+    <suffix domain="moss.no" />
+    <suffix domain="mosvik.no" />
+    <suffix domain="malselv.no" />
+    <suffix domain="målselv.no" />
+    <suffix domain="malatvuopmi.no" />
+    <suffix domain="málatvuopmi.no" />
+    <suffix domain="namdalseid.no" />
+    <suffix domain="aejrie.no" />
+    <suffix domain="namsos.no" />
+    <suffix domain="namsskogan.no" />
+    <suffix domain="naamesjevuemie.no" />
+    <suffix domain="nååmesjevuemie.no" />
+    <suffix domain="laakesvuemie.no" />
+    <suffix domain="nannestad.no" />
+    <suffix domain="narvik.no" />
+    <suffix domain="narviika.no" />
+    <suffix domain="naustdal.no" />
+    <suffix domain="nedre-eiker.no" />
+    <suffix domain="nes.akershus.no" />
+    <suffix domain="nes.buskerud.no" />
+    <suffix domain="nesna.no" />
+    <suffix domain="nesodden.no" />
+    <suffix domain="nesseby.no" />
+    <suffix domain="unjarga.no" />
+    <suffix domain="unjárga.no" />
+    <suffix domain="nesset.no" />
+    <suffix domain="nissedal.no" />
+    <suffix domain="nittedal.no" />
+    <suffix domain="nord-aurdal.no" />
+    <suffix domain="nord-fron.no" />
+    <suffix domain="nord-odal.no" />
+    <suffix domain="norddal.no" />
+    <suffix domain="nordkapp.no" />
+    <suffix domain="davvenjarga.no" />
+    <suffix domain="davvenjárga.no" />
+    <suffix domain="nordre-land.no" />
+    <suffix domain="nordreisa.no" />
+    <suffix domain="raisa.no" />
+    <suffix domain="ráisa.no" />
+    <suffix domain="nore-og-uvdal.no" />
+    <suffix domain="notodden.no" />
+    <suffix domain="naroy.no" />
+    <suffix domain="nærøy.no" />
+    <suffix domain="notteroy.no" />
+    <suffix domain="nøtterøy.no" />
+    <suffix domain="odda.no" />
+    <suffix domain="oksnes.no" />
+    <suffix domain="`øksnes.no" />
+    <suffix domain="oppdal.no" />
+    <suffix domain="oppegard.no" />
+    <suffix domain="oppegård.no" />
+    <suffix domain="orkdal.no" />
+    <suffix domain="orland.no" />
+    <suffix domain="ørland.no" />
+    <suffix domain="orskog.no" />
+    <suffix domain="ørskog.no" />
+    <suffix domain="orsta.no" />
+    <suffix domain="ørsta.no" />
+    <suffix domain="os.hedmark.no" />
+    <suffix domain="os.hordaland.no" />
+    <suffix domain="osen.no" />
+    <suffix domain="osteroy.no" />
+    <suffix domain="osterøy.no" />
+    <suffix domain="ostre-toten.no" />
+    <suffix domain="østre-toten.no" />
+    <suffix domain="overhalla.no" />
+    <suffix domain="ovre-eiker.no" />
+    <suffix domain="øvre-eiker.no" />
+    <suffix domain="oyer.no" />
+    <suffix domain="øyer.no" />
+    <suffix domain="oygarden.no" />
+    <suffix domain="øygarden.no" />
+    <suffix domain="oystre-slidre.no" />
+    <suffix domain="øystre-slidre.no" />
+    <suffix domain="porsanger.no" />
+    <suffix domain="porsangu.no" />
+    <suffix domain="porsáŋgu.no" />
+    <suffix domain="porsgrunn.no" />
+    <suffix domain="radoy.no" />
+    <suffix domain="radøy.no" />
+    <suffix domain="rakkestad.no" />
+    <suffix domain="rana.no" />
+    <suffix domain="ruovat.no" />
+    <suffix domain="randaberg.no" />
+    <suffix domain="rauma.no" />
+    <suffix domain="rendalen.no" />
+    <suffix domain="rennebu.no" />
+    <suffix domain="rennesoy.no" />
+    <suffix domain="rennesøy.no" />
+    <suffix domain="rindal.no" />
+    <suffix domain="ringebu.no" />
+    <suffix domain="ringerike.no" />
+    <suffix domain="ringsaker.no" />
+    <suffix domain="rissa.no" />
+    <suffix domain="risor.no" />
+    <suffix domain="risør.no" />
+    <suffix domain="roan.no" />
+    <suffix domain="rollag.no" />
+    <suffix domain="rygge.no" />
+    <suffix domain="ralingen.no" />
+    <suffix domain="rælingen.no" />
+    <suffix domain="rodoy.no" />
+    <suffix domain="rødøy.no" />
+    <suffix domain="romskog.no" />
+    <suffix domain="rømskog.no" />
+    <suffix domain="roros.no" />
+    <suffix domain="røros.no" />
+    <suffix domain="rost.no" />
+    <suffix domain="røst.no" />
+    <suffix domain="royken.no" />
+    <suffix domain="røyken.no" />
+    <suffix domain="royrvik.no" />
+    <suffix domain="røyrvik.no" />
+    <suffix domain="rade.no" />
+    <suffix domain="råde.no" />
+    <suffix domain="salangen.no" />
+    <suffix domain="siellak.no" />
+    <suffix domain="saltdal.no" />
+    <suffix domain="sálát.no" />
+    <suffix domain="sálat.no" />
+    <suffix domain="samnanger.no" />
+    <suffix domain="sande.more-og-romsdal.no" />
+    <suffix domain="sande.møre-og-romsdal.no" />
+    <suffix domain="sande.vestfold.no" />
+    <suffix domain="sandefjord.no" />
+    <suffix domain="sandnes.no" />
+    <suffix domain="sandoy.no" />
+    <suffix domain="sandøy.no" />
+    <suffix domain="sarpsborg.no" />
+    <suffix domain="sauda.no" />
+    <suffix domain="sauherad.no" />
+    <suffix domain="sel.no" />
+    <suffix domain="selbu.no" />
+    <suffix domain="selje.no" />
+    <suffix domain="seljord.no" />
+    <suffix domain="sigdal.no" />
+    <suffix domain="siljan.no" />
+    <suffix domain="sirdal.no" />
+    <suffix domain="skaun.no" />
+    <suffix domain="skedsmo.no" />
+    <suffix domain="ski.no" />
+    <suffix domain="skien.no" />
+    <suffix domain="skiptvet.no" />
+    <suffix domain="skjervoy.no" />
+    <suffix domain="skjervøy.no" />
+    <suffix domain="skierva.no" />
+    <suffix domain="skiervá.no" />
+    <suffix domain="skjak.no" />
+    <suffix domain="skjåk.no" />
+    <suffix domain="skodje.no" />
+    <suffix domain="skanland.no" />
+    <suffix domain="skånland.no" />
+    <suffix domain="skanit.no" />
+    <suffix domain="skánit.no" />
+    <suffix domain="smola.no" />
+    <suffix domain="smøla.no" />
+    <suffix domain="snillfjord.no" />
+    <suffix domain="snasa.no" />
+    <suffix domain="snåsa.no" />
+    <suffix domain="snoasa.no" />
+    <suffix domain="snaase.no" />
+    <suffix domain="snåase.no" />
+    <suffix domain="sogndal.no" />
+    <suffix domain="sokndal.no" />
+    <suffix domain="sola.no" />
+    <suffix domain="solund.no" />
+    <suffix domain="songdalen.no" />
+    <suffix domain="sortland.no" />
+    <suffix domain="spydeberg.no" />
+    <suffix domain="stange.no" />
+    <suffix domain="stavanger.no" />
+    <suffix domain="steigen.no" />
+    <suffix domain="steinkjer.no" />
+    <suffix domain="stjordal.no" />
+    <suffix domain="stjørdal.no" />
+    <suffix domain="stokke.no" />
+    <suffix domain="stor-elvdal.no" />
+    <suffix domain="stord.no" />
+    <suffix domain="stordal.no" />
+    <suffix domain="storfjord.no" />
+    <suffix domain="omasvuotna.no" />
+    <suffix domain="strand.no" />
+    <suffix domain="stranda.no" />
+    <suffix domain="stryn.no" />
+    <suffix domain="sula.no" />
+    <suffix domain="suldal.no" />
+    <suffix domain="sund.no" />
+    <suffix domain="sunndal.no" />
+    <suffix domain="surnadal.no" />
+    <suffix domain="sveio.no" />
+    <suffix domain="svelvik.no" />
+    <suffix domain="sykkylven.no" />
+    <suffix domain="sogne.no" />
+    <suffix domain="søgne.no" />
+    <suffix domain="somna.no" />
+    <suffix domain="sømna.no" />
+    <suffix domain="sondre-land.no" />
+    <suffix domain="søndre-land.no" />
+    <suffix domain="sor-aurdal.no" />
+    <suffix domain="sør-aurdal.no" />
+    <suffix domain="sor-fron.no" />
+    <suffix domain="sør-fron.no" />
+    <suffix domain="sor-odal.no" />
+    <suffix domain="sør-odal.no" />
+    <suffix domain="sor-varanger.no" />
+    <suffix domain="sør-varanger.no" />
+    <suffix domain="matta-varjjat.no" />
+    <suffix domain="mátta-várjjat.no" />
+    <suffix domain="sorfold.no" />
+    <suffix domain="sørfold.no" />
+    <suffix domain="sorreisa.no" />
+    <suffix domain="sørreisa.no" />
+    <suffix domain="sorum.no" />
+    <suffix domain="sørum.no" />
+    <suffix domain="tana.no" />
+    <suffix domain="deatnu.no" />
+    <suffix domain="time.no" />
+    <suffix domain="tingvoll.no" />
+    <suffix domain="tinn.no" />
+    <suffix domain="tjeldsund.no" />
+    <suffix domain="dielddanuorri.no" />
+    <suffix domain="tjome.no" />
+    <suffix domain="tjøme.no" />
+    <suffix domain="tokke.no" />
+    <suffix domain="tolga.no" />
+    <suffix domain="torsken.no" />
+    <suffix domain="tranoy.no" />
+    <suffix domain="tranøy.no" />
+    <suffix domain="tromso.no" />
+    <suffix domain="tromsø.no" />
+    <suffix domain="tromsa.no" />
+    <suffix domain="romsa.no" />
+    <suffix domain="trondheim.no" />
+    <suffix domain="troandin.no" />
+    <suffix domain="trysil.no" />
+    <suffix domain="trana.no" />
+    <suffix domain="træna.no" />
+    <suffix domain="trogstad.no" />
+    <suffix domain="trøgstad.no" />
+    <suffix domain="tvedestrand.no" />
+    <suffix domain="tydal.no" />
+    <suffix domain="tynset.no" />
+    <suffix domain="tysfjord.no" />
+    <suffix domain="divtasvuodna.no" />
+    <suffix domain="divttasvuotna.no" />
+    <suffix domain="tysnes.no" />
+    <suffix domain="tysvar.no" />
+    <suffix domain="tysvær.no" />
+    <suffix domain="tonsberg.no" />
+    <suffix domain="tønsberg.no" />
+    <suffix domain="ullensaker.no" />
+    <suffix domain="ullensvang.no" />
+    <suffix domain="ulvik.no" />
+    <suffix domain="utsira.no" />
+    <suffix domain="vadso.no" />
+    <suffix domain="vadsø.no" />
+    <suffix domain="cahcesuolo.no" />
+    <suffix domain="cáhcesuolo.no" />
+    <suffix domain="vaksdal.no" />
+    <suffix domain="valle.no" />
+    <suffix domain="vang.no" />
+    <suffix domain="vanylven.no" />
+    <suffix domain="vardo.no" />
+    <suffix domain="vardø.no" />
+    <suffix domain="varggat.no" />
+    <suffix domain="várggát.no" />
+    <suffix domain="vefsn.no" />
+    <suffix domain="vaapste.no" />
+    <suffix domain="vega.no" />
+    <suffix domain="vegarshei.no" />
+    <suffix domain="vegårshei.no" />
+    <suffix domain="vennesla.no" />
+    <suffix domain="verdal.no" />
+    <suffix domain="verran.no" />
+    <suffix domain="vestby.no" />
+    <suffix domain="vestnes.no" />
+    <suffix domain="vestre-slidre.no" />
+    <suffix domain="vestre-toten.no" />
+    <suffix domain="vestvagoy.no" />
+    <suffix domain="vestvågøy.no" />
+    <suffix domain="vevelstad.no" />
+    <suffix domain="vik.no" />
+    <suffix domain="vikna.no" />
+    <suffix domain="vindafjord.no" />
+    <suffix domain="volda.no" />
+    <suffix domain="voss.no" />
+    <suffix domain="varoy.no" />
+    <suffix domain="værøy.no" />
+    <suffix domain="vagan.no" />
+    <suffix domain="vågan.no" />
+    <suffix domain="voagat.no" />
+    <suffix domain="vagsoy.no" />
+    <suffix domain="vågsøy.no" />
+    <suffix domain="vaga.no" />
+    <suffix domain="vågå.no" />
+    <suffix domain="valer.ostfold.no" />
+    <suffix domain="våler.østfold.no" />
+    <suffix domain="valer.hedmark.no" />
+    <suffix domain="våler.hedmark.no" />
+
+    <!--  np : http://www.mos.com.np/register.html-->
+
+    <!--  nr : http://cenpac.net.nr/dns/index.html-->
+    <suffix domain="biz.nr" />
+    <suffix domain="info.nr" />
+    <suffix domain="gov.nr" />
+    <suffix domain="edu.nr" />
+    <suffix domain="org.nr" />
+    <suffix domain="net.nr" />
+    <suffix domain="com.nr" />
+
+    <!--  nu : http://en.wikipedia.org/wiki/.nu-->
+
+    <!--  nz : http://en.wikipedia.org/wiki/.nz-->
+   <suffix domain="ac.nz" />
+   <suffix domain="co.nz" />
+   <suffix domain="cri.nz" />
+   <suffix domain="geek.nz" />
+   <suffix domain="gen.nz" />
+   <suffix domain="govt.nz" />
+   <suffix domain="iwi.nz" />
+   <suffix domain="maori.nz" />
+   <suffix domain="mil.nz" />
+   <suffix domain="net.nz" />
+   <suffix domain="org.nz" />
+   <suffix domain="parliament.nz" />
+   <suffix domain="school.nz" />
+
+    <!--  om : http://en.wikipedia.org/wiki/.om-->
+
+    <!--  org : http://en.wikipedia.org/wiki/.og-->
+
+    <!--  pa : http://www.nic.pa/-->
+
+    <!--  pe : http://www.nic.pe/normas-proced-i.htm-->
+
+    <!--  pf : http://www.gobin.info/domainname/formulaire-pf.pdf-->
+    <suffix domain="com.pf" />
+    <suffix domain="org.pf" />
+    <suffix domain="edu.pf" />
+
+    <!--  pg : http://en.wikipedia.org/wiki/.pg-->
+
+    <!--  ph : http://www.domains.ph/FAQ2.asp-->
+    <!--  list of 2nd level tlds ?-->
+    <suffix domain="com.ph" />
+    <suffix domain="net.ph" />
+    <suffix domain="org.ph" />
+    <suffix domain="gov.ph" />
+    <suffix domain="edu.ph" />
+    <suffix domain="ngo.ph" />
+    <suffix domain="mil.ph" />
+
+    <!--  pk : http://pk5.pknic.net.pk/pk5/msgNamepk.PK-->
+    <suffix domain="com.pk" />
+    <suffix domain="net.pk" />
+    <suffix domain="edu.pk" />
+    <suffix domain="org.pk" />
+    <suffix domain="fam.pk" />
+    <suffix domain="biz.pk" />
+    <suffix domain="web.pk" />
+    <suffix domain="gov.pk" />
+    <suffix domain="gob.pk" />
+    <suffix domain="gok.pk" />
+    <suffix domain="gon.pk" />
+    <suffix domain="gop.pk" />
+    <suffix domain="gos.pk" />
+    <suffix domain="goa.pk" />
+    <suffix domain="info.pk" />
+
+    <!--  pl : http://www.dns.pl/english/-->
+    <!--  NASK functional domains (nask.pl / dns.pl) : http://www.dns.pl/english/dns-funk.html-->
+    <suffix domain="aid.pl" />
+    <suffix domain="agro.pl" />
+    <suffix domain="atm.pl" />
+    <suffix domain="auto.pl" />
+    <suffix domain="biz.pl" />
+    <suffix domain="com.pl" />
+    <suffix domain="edu.pl" />
+    <suffix domain="gmina.pl" />
+    <suffix domain="gsm.pl" />
+    <suffix domain="info.pl" />
+    <suffix domain="mail.pl" />
+    <suffix domain="miasta.pl" />
+    <suffix domain="media.pl" />
+    <suffix domain="mil.pl" />
+    <suffix domain="net.pl" />
+    <suffix domain="nieruchomosci.pl" />
+    <suffix domain="nom.pl" />
+    <suffix domain="org.pl" />
+    <suffix domain="pc.pl" />
+    <suffix domain="powiat.pl" />
+    <suffix domain="priv.pl" />
+    <suffix domain="realestate.pl" />
+    <suffix domain="rel.pl" />
+    <suffix domain="sex.pl" />
+    <suffix domain="shop.pl" />
+    <suffix domain="sklep.pl" />
+    <suffix domain="sos.pl" />
+    <suffix domain="szkola.pl" />
+    <suffix domain="targi.pl" />
+    <suffix domain="tm.pl" />
+    <suffix domain="tourism.pl" />
+    <suffix domain="travel.pl" />
+    <suffix domain="turystyka.pl" />
+    <!--  ICM functional domains (icm.edu.pl)-->
+    <suffix domain="6bone.pl" />
+    <suffix domain="art.pl" />
+    <suffix domain="mbone.pl" />
+    <!--  Government domains (administred by ippt.gov.pl)-->
+    <suffix domain="gov.pl" />
+    <suffix domain="uw.gov.pl" />
+    <suffix domain="um.gov.pl" />
+    <suffix domain="ug.gov.pl" />
+    <suffix domain="upow.gov.pl" />
+    <suffix domain="starostwo.gov.pl" />
+    <suffix domain="so.gov.pl" />
+    <suffix domain="sr.gov.pl" />
+    <suffix domain="po.gov.pl" />
+    <suffix domain="pa.gov.pl" />
+    <!--  other functional domains-->
+    <suffix domain="med.pl" />
+    <suffix domain="ngo.pl" />
+    <suffix domain="irc.pl" />
+    <suffix domain="usenet.pl" />
+    <!--  NASK geographical domains : http://www.dns.pl/english/dns-regiony.html-->
+    <suffix domain="augustow.pl" />
+    <suffix domain="babia-gora.pl" />
+    <suffix domain="bedzin.pl" />
+    <suffix domain="beskidy.pl" />
+    <suffix domain="bialowieza.pl" />
+    <suffix domain="bialystok.pl" />
+    <suffix domain="bielawa.pl" />
+    <suffix domain="bieszczady.pl" />
+    <suffix domain="boleslawiec.pl" />
+    <suffix domain="bydgoszcz.pl" />
+    <suffix domain="bytom.pl" />
+    <suffix domain="cieszyn.pl" />
+    <suffix domain="czeladz.pl" />
+    <suffix domain="czest.pl" />
+    <suffix domain="dlugoleka.pl" />
+    <suffix domain="elblag.pl" />
+    <suffix domain="elk.pl" />
+    <suffix domain="glogow.pl" />
+    <suffix domain="gniezno.pl" />
+    <suffix domain="gorlice.pl" />
+    <suffix domain="grajewo.pl" />
+    <suffix domain="ilawa.pl" />
+    <suffix domain="jaworzno.pl" />
+    <suffix domain="jelenia-gora.pl" />
+    <suffix domain="jgora.pl" />
+    <suffix domain="kalisz.pl" />
+    <suffix domain="kazimierz-dolny.pl" />
+    <suffix domain="karpacz.pl" />
+    <suffix domain="kartuzy.pl" />
+    <suffix domain="kaszuby.pl" />
+    <suffix domain="katowice.pl" />
+    <suffix domain="kepno.pl" />
+    <suffix domain="ketrzyn.pl" />
+    <suffix domain="klodzko.pl" />
+    <suffix domain="kobierzyce.pl" />
+    <suffix domain="kolobrzeg.pl" />
+    <suffix domain="konin.pl" />
+    <suffix domain="konskowola.pl" />
+    <suffix domain="kutno.pl" />
+    <suffix domain="lapy.pl" />
+    <suffix domain="lebork.pl" />
+    <suffix domain="legnica.pl" />
+    <suffix domain="lezajsk.pl" />
+    <suffix domain="limanowa.pl" />
+    <suffix domain="lomza.pl" />
+    <suffix domain="lowicz.pl" />
+    <suffix domain="lubin.pl" />
+    <suffix domain="lukow.pl" />
+    <suffix domain="malbork.pl" />
+    <suffix domain="malopolska.pl" />
+    <suffix domain="mazowsze.pl" />
+    <suffix domain="mazury.pl" />
+    <suffix domain="mielec.pl" />
+    <suffix domain="mielno.pl" />
+    <suffix domain="mragowo.pl" />
+    <suffix domain="naklo.pl" />
+    <suffix domain="nowaruda.pl" />
+    <suffix domain="nysa.pl" />
+    <suffix domain="olawa.pl" />
+    <suffix domain="olecko.pl" />
+    <suffix domain="olkusz.pl" />
+    <suffix domain="olsztyn.pl" />
+    <suffix domain="opoczno.pl" />
+    <suffix domain="opole.pl" />
+    <suffix domain="ostroda.pl" />
+    <suffix domain="ostroleka.pl" />
+    <suffix domain="ostrowiec.pl" />
+    <suffix domain="ostrowwlkp.pl" />
+    <suffix domain="pila.pl" />
+    <suffix domain="pisz.pl" />
+    <suffix domain="podhale.pl" />
+    <suffix domain="podlasie.pl" />
+    <suffix domain="polkowice.pl" />
+    <suffix domain="pomorze.pl" />
+    <suffix domain="pomorskie.pl" />
+    <suffix domain="prochowice.pl" />
+    <suffix domain="pruszkow.pl" />
+    <suffix domain="przeworsk.pl" />
+    <suffix domain="pulawy.pl" />
+    <suffix domain="radom.pl" />
+    <suffix domain="rawa-maz.pl" />
+    <suffix domain="rybnik.pl" />
+    <suffix domain="rzeszow.pl" />
+    <suffix domain="sanok.pl" />
+    <suffix domain="sejny.pl" />
+    <suffix domain="slask.pl" />
+    <suffix domain="slupsk.pl" />
+    <suffix domain="sosnowiec.pl" />
+    <suffix domain="stalowa-wola.pl" />
+    <suffix domain="skoczow.pl" />
+    <suffix domain="starachowice.pl" />
+    <suffix domain="stargard.pl" />
+    <suffix domain="suwalki.pl" />
+    <suffix domain="swidnica.pl" />
+    <suffix domain="swiebodzin.pl" />
+    <suffix domain="swinoujscie.pl" />
+    <suffix domain="szczecin.pl" />
+    <suffix domain="szczytno.pl" />
+    <suffix domain="tarnobrzeg.pl" />
+    <suffix domain="tgory.pl" />
+    <suffix domain="turek.pl" />
+    <suffix domain="tychy.pl" />
+    <suffix domain="ustka.pl" />
+    <suffix domain="walbrzych.pl" />
+    <suffix domain="warmia.pl" />
+    <suffix domain="warszawa.pl" />
+    <suffix domain="waw.pl" />
+    <suffix domain="wegrow.pl" />
+    <suffix domain="wielun.pl" />
+    <suffix domain="wlocl.pl" />
+    <suffix domain="wloclawek.pl" />
+    <suffix domain="wodzislaw.pl" />
+    <suffix domain="wolomin.pl" />
+    <suffix domain="wroclaw.pl" />
+    <suffix domain="zachpomor.pl" />
+    <suffix domain="zagan.pl" />
+    <suffix domain="zarow.pl" />
+    <suffix domain="zgora.pl" />
+    <suffix domain="zgorzelec.pl" />
+    <!--  TASK geographical domains (www.task.gda.pl/uslugi/dns)-->
+    <suffix domain="gda.pl" />
+    <suffix domain="gdansk.pl" />
+    <suffix domain="gdynia.pl" />
+    <suffix domain="sopot.pl" />
+    <!--  other geographical domains-->
+    <suffix domain="gliwice.pl" />
+    <suffix domain="krakow.pl" />
+    <suffix domain="poznan.pl" />
+    <suffix domain="wroc.pl" />
+    <suffix domain="zakopane.pl" />
+
+    <!--  pn : http://www.government.pn/PnRegistry/policies.htm-->
+    <suffix domain="gov.pn" />
+    <suffix domain="co.pn" />
+    <suffix domain="org.pn" />
+    <suffix domain="edu.pn" />
+    <suffix domain="net.pn" />
+
+    <!--  pr : http://www.nic.pr/index.asp?f=1-->
+    <suffix domain="com.pr" />
+    <suffix domain="net.pr" />
+    <suffix domain="org.pr" />
+    <suffix domain="gov.pr" />
+    <suffix domain="edu.pr" />
+    <suffix domain="isla.pr" />
+    <suffix domain="pro.pr" />
+    <suffix domain="biz.pr" />
+    <suffix domain="info.pr" />
+    <suffix domain="name.pr" />
+    <!--  these aren't mentioned on nic.pr, but on http://en.wikipedia.org/wiki/.pr-->
+    <suffix domain="est.pr" />
+    <suffix domain="prof.pr" />
+    <suffix domain="ac.pr" />
+
+    <!--  pro : http://www.nic.pro/support_faq.htm-->
+    <suffix domain="aca.pro" />
+    <suffix domain="bar.pro" />
+    <suffix domain="cpa.pro" />
+    <suffix domain="jur.pro" />
+    <suffix domain="law.pro" />
+    <suffix domain="med.pro" />
+    <suffix domain="eng.pro" />
+
+    <!--  ps : http://en.wikipedia.org/wiki/.ps-->
+    <!--  list of 2nd level tlds ?-->
+    <suffix domain="edu.ps" />
+    <suffix domain="gov.ps" />
+    <suffix domain="sec.ps" />
+    <suffix domain="plo.ps" />
+    <suffix domain="com.ps" />
+    <suffix domain="org.ps" />
+    <suffix domain="net.ps" />
+
+    <!--  pt : http://online.dns.pt/dns/start_dns-->
+    <suffix domain="net.pt" />
+    <suffix domain="gov.pt" />
+    <suffix domain="org.pt" />
+    <suffix domain="edu.pt" />
+    <suffix domain="int.pt" />
+    <suffix domain="publ.pt" />
+    <suffix domain="com.pt" />
+    <suffix domain="nome.pt" />
+
+    <!--  pw : http://en.wikipedia.org/wiki/.pw-->
+
+    <!--  py : http://www.nic.py/faq_a.html#faq_b-->
+
+    <!--  qa : http://www.qatar.net.qa/services/virtual.htm-->
+
+    <!--  re : http://www.afnic.re/obtenir/chartes/nommage-re/annexe-descriptifs-->
+    <suffix domain="com.re" />
+    <suffix domain="asso.re" />
+    <suffix domain="nom.re" />
+
+    <!--  ro : http://www.rotld.ro/-->
+    <suffix domain="com.ro" />
+    <suffix domain="org.ro" />
+    <suffix domain="tm.ro" />
+    <suffix domain="nt.ro" />
+    <suffix domain="nom.ro" />
+    <suffix domain="info.ro" />
+    <suffix domain="rec.ro" />
+    <suffix domain="arts.ro" />
+    <suffix domain="firm.ro" />
+    <suffix domain="store.ro" />
+    <suffix domain="www.ro" />
+
+    <!--  ru : http://en.wikipedia.org/wiki/.ru-->
+    <suffix domain="com.ru" />
+    <suffix domain="net.ru" />
+    <suffix domain="org.ru" />
+    <suffix domain="pp.ru" />
+    <suffix domain="int.ru" />
+    <!--  there should be geo-names like msk.ru, but I didn't find a list-->
+
+    <!--  rw : http://www.nic.rw/cgi-bin/policy.pl-->
+    <suffix domain="gov.rw" />
+    <suffix domain="net.rw" />
+    <suffix domain="edu.rw" />
+    <suffix domain="ac.rw" />
+    <suffix domain="com.rw" />
+    <suffix domain="co.rw" />
+    <suffix domain="int.rw" />
+    <suffix domain="mil.rw" />
+    <suffix domain="gouv.rw" />
+
+    <!--  sa : http://www.saudinic.net.sa/page.php?page=1&lang=1-->
+
+    <!--  sb : http://www.sbnic.net.sb/-->
+
+    <!--  sc : http://www.nic.sc/-->
+    <suffix domain="com.sc" />
+    <suffix domain="gov.sc" />
+    <suffix domain="net.sc" />
+    <suffix domain="org.sc" />
+    <suffix domain="edu.sc" />
+
+    <!--  sd : http://www.isoc.sd/sudanic.isoc.sd/billing_pricing.htm-->
+    <suffix domain="com.sd" />
+    <suffix domain="net.sd" />
+    <suffix domain="org.sd" />
+    <suffix domain="edu.sd" />
+    <suffix domain="med.sd" />
+    <suffix domain="tv.sd" />
+    <suffix domain="gov.sd" />
+    <suffix domain="info.sd" />
+
+    <!--  se : http://en.wikipedia.org/wiki/.se-->
+    <suffix domain="org.se" />
+    <suffix domain="pp.se" />
+    <suffix domain="tm.se" />
+    <suffix domain="parti.se" />
+    <suffix domain="press.se" />
+    <suffix domain="mil.se" />
+    <!--  se geographical names-->
+    <suffix domain="ab.se" />
+    <suffix domain="c.se" />
+    <suffix domain="d.se" />
+    <suffix domain="e.se" />
+    <suffix domain="f.se" />
+    <suffix domain="g.se" />
+    <suffix domain="h.se" />
+    <suffix domain="i.se" />
+    <suffix domain="k.se" />
+    <suffix domain="m.se" />
+    <suffix domain="n.se" />
+    <suffix domain="o.se" />
+    <suffix domain="s.se" />
+    <suffix domain="t.se" />
+    <suffix domain="u.se" />
+    <suffix domain="w.se" />
+    <suffix domain="x.se" />
+    <suffix domain="y.se" />
+    <suffix domain="z.se" />
+    <suffix domain="ac.se" />
+    <suffix domain="bd.se" />
+
+    <!--  sg : http://www.nic.net.sg/sub_policies_agreement/2ld.html-->
+    <suffix domain="com.sg" />
+    <suffix domain="net.sg" />
+    <suffix domain="org.sg" />
+    <suffix domain="gov.sg" />
+    <suffix domain="edu.sg" />
+    <suffix domain="per.sg" />
+
+    <!--  sh : http://www.nic.sh/rules.html-->
+    <!--  list of 2nd level domains ?-->
+
+    <!--  si : http://en.wikipedia.org/wiki/.si-->
+
+    <!--  sk : http://en.wikipedia.org/wiki/.sk-->
+
+    <!--  sl : http://en.wikipedia.org/wiki/.sl-->
+    <!--  list of 2nd level domains ?-->
+
+    <!--  sm : http://en.wikipedia.org/wiki/.sm-->
+
+    <!--  sn : http://en.wikipedia.org/wiki/.sn-->
+    <!--  list of 2nd level domains ?-->
+
+    <!--  sr : http://en.wikipedia.org/wiki/.sr-->
+
+    <!--  st : http://www.nic.st/html/policyrules/-->
+
+    <!--  su : http://en.wikipedia.org/wiki/.su-->
+
+    <!--  sv : http://www.svnet.org.sv/svpolicy.html-->
+
+    <!--  sy : http://www.gobin.info/domainname/sy.doc-->
+
+    <!--  sz : http://en.wikipedia.org/wiki/.sz-->
+    <!--  list of 2nd level domains ?-->
+
+    <!--  tc : http://en.wikipedia.org/wiki/.tc-->
+
+    <!--  td : http://en.wikipedia.org/wiki/.td-->
+
+    <!--  tf : http://en.wikipedia.org/wiki/.tf-->
+
+    <!--  tg : http://en.wikipedia.org/wiki/.tg-->
+    <!--  list of 2nd level domains ?-->
+
+    <!--  th : http://en.wikipedia.org/wiki/.th-->
+
+    <!--  tj : http://www.nic.tj/policy.htm-->
+    <suffix domain="ac.tj" />
+    <suffix domain="biz.tj" />
+    <suffix domain="com.tj" />
+    <suffix domain="co.tj" />
+    <suffix domain="edu.tj" />
+    <suffix domain="int.tj" />
+    <suffix domain="name.tj" />
+    <suffix domain="net.tj" />
+    <suffix domain="org.tj" />
+    <suffix domain="web.tj" />
+    <suffix domain="gov.tj" />
+    <suffix domain="go.tj" />
+    <suffix domain="mil.tj" />
+
+    <!--  tk : http://en.wikipedia.org/wiki/.tk-->
+
+    <!--  tl : http://en.wikipedia.org/wiki/.tl-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  tm : http://www.nic.tm/rules.html-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  tn : http://en.wikipedia.org/wiki/.tn-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  to : http://en.wikipedia.org/wiki/.to-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  tr : http://en.wikipedia.org/wiki/.tr-->
+
+    <!--  travel : http://en.wikipedia.org/wiki/.travel-->
+
+    <!--  tt : http://www.nic.tt/-->
+    <suffix domain="co.tt" />
+    <suffix domain="com.tt" />
+    <suffix domain="org.tt" />
+    <suffix domain="net.tt" />
+    <suffix domain="biz.tt" />
+    <suffix domain="info.tt" />
+    <suffix domain="pro.tt" />
+    <suffix domain="int.tt" />
+    <suffix domain="coop.tt" />
+    <suffix domain="jobs.tt" />
+    <suffix domain="mobi.tt" />
+    <suffix domain="travel.tt" />
+    <suffix domain="museum.tt" />
+    <suffix domain="aero.tt" />
+    <suffix domain="name.tt" />
+    <suffix domain="gov.tt" />
+    <suffix domain="edu.tt" />
+
+    <!--  tv : http://en.wikipedia.org/wiki/.tv-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  tw : http://en.wikipedia.org/wiki/.tw-->
+    <suffix domain="edu.tw" />
+    <suffix domain="gov.tw" />
+    <suffix domain="mil.tw" />
+    <suffix domain="com.tw" />
+    <suffix domain="net.tw" />
+    <suffix domain="org.tw" />
+    <suffix domain="idv.tw" />
+    <suffix domain="game.tw" />
+    <suffix domain="ebiz.tw" />
+    <suffix domain="club.tw" />
+    <suffix domain="網路.tw" />
+    <suffix domain="組織.tw" />
+    <suffix domain="商業.tw" />
+
+    <!--  tz : http://en.wikipedia.org/wiki/.tz-->
+
+    <!--  ua : http://www.nic.net.ua/-->
+    <suffix domain="com.ua" />
+    <suffix domain="edu.ua" />
+    <suffix domain="gov.ua" />
+    <suffix domain="net.ua" />
+    <suffix domain="org.ua" />
+    <!--  ua geo-names-->
+    <suffix domain="cherkassy.ua" />
+    <suffix domain="chernigov.ua" />
+    <suffix domain="chernovtsy.ua" />
+    <suffix domain="ck.ua" />
+    <suffix domain="cn.ua" />
+    <suffix domain="crimea.ua" />
+    <suffix domain="cv.ua" />
+    <suffix domain="dn.ua" />
+    <suffix domain="dnepropetrovsk.ua" />
+    <suffix domain="donetsk.ua" />
+    <suffix domain="dp.ua" />
+    <suffix domain="if.ua" />
+    <suffix domain="ivano-frankivsk.ua" />
+    <suffix domain="kh.ua" />
+    <suffix domain="kharkov.ua" />
+    <suffix domain="kherson.ua" />
+    <suffix domain="kiev.ua" />
+    <suffix domain="kirovograd.ua" />
+    <suffix domain="km.ua" />
+    <suffix domain="kr.ua" />
+    <suffix domain="ks.ua" />
+    <suffix domain="lg.ua" />
+    <suffix domain="lugansk.ua" />
+    <suffix domain="lutsk.ua" />
+    <suffix domain="lviv.ua" />
+    <suffix domain="mk.ua" />
+    <suffix domain="nikolaev.ua" />
+    <suffix domain="od.ua" />
+    <suffix domain="odessa.ua" />
+    <suffix domain="pl.ua" />
+    <suffix domain="poltava.ua" />
+    <suffix domain="rovno.ua" />
+    <suffix domain="rv.ua" />
+    <suffix domain="sebastopol.ua" />
+    <suffix domain="sumy.ua" />
+    <suffix domain="te.ua" />
+    <suffix domain="ternopil.ua" />
+    <suffix domain="vinnica.ua" />
+    <suffix domain="vn.ua" />
+    <suffix domain="zaporizhzhe.ua" />
+    <suffix domain="zp.ua" />
+    <suffix domain="uz.ua" />
+    <suffix domain="uzhgorod.ua" />
+    <suffix domain="zhitomir.ua" />
+    <suffix domain="zt.ua" />
+
+    <!--  ug : http://www.registry.co.ug/-->
+    <suffix domain="co.ug" />
+    <suffix domain="ac.ug" />
+    <suffix domain="sc.ug" />
+    <suffix domain="go.ug" />
+    <suffix domain="ne.ug" />
+    <suffix domain="or.ug" />
+
+    <!--  uk : http://en.wikipedia.org/wiki/.uk-->
+    <suffix domain="sch.uk" />
+    <suffix domain="bl.uk" />
+    <suffix domain="british-library.uk" />
+    <suffix domain="icnet.uk" />
+    <suffix domain="jet.uk" />
+    <suffix domain="nel.uk" />
+    <suffix domain="nls.uk" />
+    <suffix domain="national-library-scotland.uk" />
+    <suffix domain="parliament.uk" />
+
+    <!--  us : http://en.wikipedia.org/wiki/.us-->
+    <suffix domain="dni.us" />
+    <suffix domain="fed.us" />
+    <suffix domain="isa.us" />
+    <suffix domain="kids.us" />
+    <suffix domain="nsn.us" />
+    <!--  us geographic names-->
+    <suffix domain="ak.us" />
+    <suffix domain="al.us" />
+    <suffix domain="ar.us" />
+    <suffix domain="az.us" />
+    <suffix domain="ca.us" />
+    <suffix domain="co.us" />
+    <suffix domain="ct.us" />
+    <suffix domain="dc.us" />
+    <suffix domain="de.us" />
+    <suffix domain="fl.us" />
+    <suffix domain="ga.us" />
+    <suffix domain="hi.us" />
+    <suffix domain="ia.us" />
+    <suffix domain="id.us" />
+    <suffix domain="il.us" />
+    <suffix domain="in.us" />
+    <suffix domain="ks.us" />
+    <suffix domain="ky.us" />
+    <suffix domain="la.us" />
+    <suffix domain="ma.us" />
+    <suffix domain="md.us" />
+    <suffix domain="me.us" />
+    <suffix domain="mi.us" />
+    <suffix domain="mn.us" />
+    <suffix domain="mo.us" />
+    <suffix domain="ms.us" />
+    <suffix domain="mt.us" />
+    <suffix domain="nc.us" />
+    <suffix domain="nd.us" />
+    <suffix domain="ne.us" />
+    <suffix domain="nh.us" />
+    <suffix domain="nj.us" />
+    <suffix domain="nm.us" />
+    <suffix domain="nv.us" />
+    <suffix domain="ny.us" />
+    <suffix domain="oh.us" />
+    <suffix domain="ok.us" />
+    <suffix domain="or.us" />
+    <suffix domain="pa.us" />
+    <suffix domain="ri.us" />
+    <suffix domain="sc.us" />
+    <suffix domain="sd.us" />
+    <suffix domain="tn.us" />
+    <suffix domain="tx.us" />
+    <suffix domain="ut.us" />
+    <suffix domain="vt.us" />
+    <suffix domain="va.us" />
+    <suffix domain="wa.us" />
+    <suffix domain="wi.us" />
+    <suffix domain="wv.us" />
+    <suffix domain="wy.us" />
+    <!--  the following rules would be only valid under the geo-name, but we can't express that-->
+    <!--  *.*.us          cities, counties, parishes, and townships (locality.state.us)-->
+    <!--  !ci.*.*.us       city government agencies (subdomain under locality)-->
+    <!--  !town.*.*.us     town government agencies (subdomain under locality)-->
+    <!--  !co.*.*.us       county government agencies (subdomain under locality)-->
+    <!--  k12.*.us      public school districts-->
+    <!--  pvt.k12.*.us  private schools-->
+    <!--  cc.*.us       community colleges-->
+    <!--  tec.*.us      technical and vocational schools-->
+    <!--  lib.*.us      state, regional, city, and county libraries-->
+    <!--  state.*.us    state government agencies-->
+    <!--  gen.*.us      general independent entities (groups not fitting into the above categories)-->
+
+    <!--  uy : http://www.antel.com.uy/-->
+
+    <!--  uz : http://www.reg.uz/registerr.html-->
+    <!--  are there other 2nd level tlds ?-->
+    <suffix domain="com.uz" />
+    <suffix domain="co.uz" />
+
+    <!--  va : http://en.wikipedia.org/wiki/.va-->
+
+    <!--  vc : http://en.wikipedia.org/wiki/.vc-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  ve : http://registro.nic.ve/nicve/registro/index.html-->
+
+    <!--  vg : http://en.wikipedia.org/wiki/.vg-->
+
+    <!--  vi : http://www.nic.vi/Domain_Rules/body_domain_rules.html-->
+    <suffix domain="com.vi" />
+    <suffix domain="org.vi" />
+    <suffix domain="edu.vi" />
+    <suffix domain="gov.vi" />
+
+    <!--  vn : https://www.dot.vn/vnnic/vnnic/domainregistration.jsp-->
+    <suffix domain="com.vn" />
+    <suffix domain="net.vn" />
+    <suffix domain="org.vn" />
+    <suffix domain="edu.vn" />
+    <suffix domain="gov.vn" />
+    <suffix domain="int.vn" />
+    <suffix domain="ac.vn" />
+    <suffix domain="biz.vn" />
+    <suffix domain="info.vn" />
+    <suffix domain="name.vn" />
+    <suffix domain="pro.vn" />
+    <suffix domain="health.vn" />
+
+    <!--  vu : http://en.wikipedia.org/wiki/.vu-->
+    <!--  list of 2nd level tlds ?-->
+
+    <!--  ws : http://en.wikipedia.org/wiki/.ws-->
+
+    <!--  ye : http://www.y.net.ye/services/domain_name.htm-->
+
+    <!--  yu : http://www.nic.yu/pravilnik-e.html-->
+
+    <!--  za : http://www.zadna.org.za/slds.html
+               http://en.wikipedia.org/wiki/.za
+     -->
+    <suffix domain="ac.za" />
+    <suffix domain="city.za" />
+    <suffix domain="co.za" />
+    <suffix domain="edu.za" />
+    <suffix domain="gov.za" />
+    <suffix domain="law.za" />
+    <suffix domain="mil.za" />
+    <suffix domain="nom.za" />
+    <suffix domain="org.za" />
+    <suffix domain="school.za" />
+    <suffix domain="ecape.school.za" />
+    <suffix domain="fs.school.za" />
+    <suffix domain="gp.school.za" />
+    <suffix domain="kzn.school.za" />
+    <suffix domain="mpm.school.za" />
+    <suffix domain="ncape.school.za" />
+    <suffix domain="lp.school.za" />
+    <suffix domain="nw.school.za" />
+    <suffix domain="wcape.school.za" />
+
+    <!--  zm : http://en.wikipedia.org/wiki/.zm-->
+
+    <!--  zw : http://en.wikipedia.org/wiki/.zw-->
+
+  </suffixes>
+</domains>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/domain-suffixes.xsd	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,130 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+  Document   : domain-suffixes.xsd
+  Author     : Enis Soztutar - enis.soz.nutch@gmail.com
+  Description: This document is the schema for valid domain-suffixes
+  definitions. For successful parsing of domain-suffixes xml files, 
+  the xml file should be validated with this xsd. 
+  See        : org.apache.nutch.util.domain.DomainSuffixesReader.java
+-->
+
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+  targetNamespace="http://lucene.apache.org/nutch"
+  xmlns="http://lucene.apache.org/nutch"
+  elementFormDefault="qualified">
+
+  <xs:element name="domains">
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element name="tlds">
+          <xs:complexType>
+            <xs:sequence>
+              <xs:element name="itlds">
+                <xs:complexType>
+                  <xs:sequence>
+                    <xs:element name="tld" maxOccurs="unbounded"
+                      type="gtld" />
+                  </xs:sequence>
+                </xs:complexType>
+              </xs:element>
+
+              <xs:element name="gtlds">
+                <xs:complexType>
+                  <xs:sequence>
+                    <xs:element name="tld" maxOccurs="unbounded"
+                      type="gtld" />
+                  </xs:sequence>
+                </xs:complexType>
+              </xs:element>
+
+              <xs:element name="cctlds">
+                <xs:complexType>
+                  <xs:sequence>
+                    <xs:element name="tld" maxOccurs="unbounded"
+                      type="cctld" />
+                  </xs:sequence>
+                </xs:complexType>
+              </xs:element>
+
+            </xs:sequence>
+          </xs:complexType>
+        </xs:element>
+
+        <xs:element name="suffixes">
+          <xs:complexType>
+            <xs:sequence>
+              <xs:element name="suffix" maxOccurs="unbounded"
+                type="sldType" />
+            </xs:sequence>
+          </xs:complexType>
+        </xs:element>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:complexType name="gtld">
+    <xs:sequence>
+      <xs:element name="status" minOccurs="0">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="INFRASTRUCTURE" />
+            <xs:enumeration value="SPONSORED" />
+            <xs:enumeration value="UNSPONSORED" />
+            <xs:enumeration value="STARTUP" />
+            <xs:enumeration value="PROPOSED" />
+            <xs:enumeration value="DELETED" />
+            <xs:enumeration value="PSEUDO_DOMAIN" />
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:element>
+      <xs:element name="boost" type="xs:float" minOccurs="0" />
+      <xs:element name="description" type="xs:string" minOccurs="0" />
+    </xs:sequence>
+    <xs:attribute name="domain" type="xs:string" />
+  </xs:complexType>
+
+  <xs:complexType name="cctld">
+    <xs:sequence>
+      <xs:element name="country" type="xs:string" />
+      <xs:element name="status" type="statusType" minOccurs="0" />
+      <xs:element name="boost" type="xs:float" minOccurs="0" />
+      <xs:element name="description" type="xs:string" minOccurs="0" />
+    </xs:sequence>
+    <xs:attribute name="domain" type="xs:string" />
+  </xs:complexType>
+
+  <xs:complexType name="sldType">
+    <xs:sequence>
+      <xs:element name="status" type="statusType" minOccurs="0" />
+      <xs:element name="boost" type="xs:float" minOccurs="0" />
+      <xs:element name="description" type="xs:string" minOccurs="0" />
+    </xs:sequence>
+    <xs:attribute name="domain" type="xs:string" />
+  </xs:complexType>
+
+  <xs:simpleType name="statusType">
+    <xs:restriction base="xs:string">
+      <xs:enumeration value="IN_USE" />
+      <xs:enumeration value="NOT_IN_USE" />
+      <xs:enumeration value="DELETED" />
+    </xs:restriction>
+  </xs:simpleType>
+
+</xs:schema>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/domain-urlfilter.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# config file for urlfilter-domain plugin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/domainblacklist-urlfilter.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# config file for urlfilter-domainblacklist plugin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/host-urlnormalizer.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# New line separated list of hosts mapped to their desired targets.
+# wildcard hosts are supported. Format: host target
+
+# Map www.apache.org to apache.org
+www.apache.org apache.org
+
+# Map all example.org subdomains to www.example.org
+*.example.org example.org
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/httpclient-auth.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<!--
+  This is the authentication configuration file for protocol-httpclient.
+  Different credentials for different authentication scopes can be
+  configured in this file. If a set of credentials is configured for a 
+  particular authentication scope (i.e. particular host, port number,
+  scheme and realm), then that set of credentials would be sent only to
+  servers falling under the specified authentication scope. Apart from
+  this at most one set of credentials can be configured as 'default'.
+  
+  When authentication is required to fetch a resource from a web-server,
+  the authentication-scope is determined from the host, port, scheme and
+  realm (if present) obtained from the URL of the page and the
+  authentication headers in the HTTP response. If it matches any
+  'authscope' in this configuration file, then the 'credentials' for
+  that 'authscope' is used for authentication. Otherwise, it would use
+  the 'default' set of credentials (with an exception which is described
+  in the next paragraph), if present. If any attribute is missing, it
+  would match all values for that attribute.
+
+  If there are several pages having different authentication realms and
+  schemes on the same web-server (same host and port, but different
+  realms and schemes), and credentials for one or more of the realms and
+  schemes for that web-server is specified, then the 'default'
+  credentials would be ignored completely for that web-server (for that
+  host and port). So, credentials to handle all realms and schemes for
+  that server may be specified explicitly by adding an extra 'authscope'
+  tag with the 'realm' and 'scheme' attributes missing for that server.
+  This is demonstrated by the last 'authscope' tag for 'example:8080' in
+  the following example.
+
+  Example:-
+    <credentials username="susam" password="masus">
+      <default realm="sso"/>
+      <authscope host="192.168.101.33" port="80" realm="login"/>
+      <authscope host="example" port="8080" realm="blogs"/>
+      <authscope host="example" port="8080" realm="wiki"/>
+      <authscope host="example" port="80" realm="quiz" scheme="NTLM"/>
+    </credentials>
+    <credentials username="admin" password="nimda">
+      <authscope host="example" port="8080"/>
+    </credentials>
+
+  In the above example, 'example:8080' server has pages with multiple
+  authentication realms. The first set of credentials would be used for
+  'blogs' and 'wiki' authentication realms. The second set of
+  credentials would be used for all other realms. For 'login' realm of
+  '192.168.101.33', the first set of credentials would be used. For any
+  other realm of '192.168.101.33' authentication would not be done. For
+  the NTLM authentication required by 'example:80', the first set of
+  credentials would be used. For 'sso' realms of all other servers, the
+  first set of credentials would be used, since it is configured as
+  'default'.
+
+  NTLM does not use the notion of realms. The domain name may be
+  specified as the value for 'realm' attribute in case of NTLM.
+-->
+
+<auth-configuration>
+
+</auth-configuration>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/httpclient-auth.xml.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<!--
+  This is the authentication configuration file for protocol-httpclient.
+  Different credentials for different authentication scopes can be
+  configured in this file. If a set of credentials is configured for a 
+  particular authentication scope (i.e. particular host, port number,
+  scheme and realm), then that set of credentials would be sent only to
+  servers falling under the specified authentication scope. Apart from
+  this at most one set of credentials can be configured as 'default'.
+  
+  When authentication is required to fetch a resource from a web-server,
+  the authentication-scope is determined from the host, port, scheme and
+  realm (if present) obtained from the URL of the page and the
+  authentication headers in the HTTP response. If it matches any
+  'authscope' in this configuration file, then the 'credentials' for
+  that 'authscope' is used for authentication. Otherwise, it would use
+  the 'default' set of credentials (with an exception which is described
+  in the next paragraph), if present. If any attribute is missing, it
+  would match all values for that attribute.
+
+  If there are several pages having different authentication realms and
+  schemes on the same web-server (same host and port, but different
+  realms and schemes), and credentials for one or more of the realms and
+  schemes for that web-server is specified, then the 'default'
+  credentials would be ignored completely for that web-server (for that
+  host and port). So, credentials to handle all realms and schemes for
+  that server may be specified explicitly by adding an extra 'authscope'
+  tag with the 'realm' and 'scheme' attributes missing for that server.
+  This is demonstrated by the last 'authscope' tag for 'example:8080' in
+  the following example.
+
+  Example:-
+    <credentials username="susam" password="masus">
+      <default realm="sso"/>
+      <authscope host="192.168.101.33" port="80" realm="login"/>
+      <authscope host="example" port="8080" realm="blogs"/>
+      <authscope host="example" port="8080" realm="wiki"/>
+      <authscope host="example" port="80" realm="quiz" scheme="NTLM"/>
+    </credentials>
+    <credentials username="admin" password="nimda">
+      <authscope host="example" port="8080"/>
+    </credentials>
+
+  In the above example, 'example:8080' server has pages with multiple
+  authentication realms. The first set of credentials would be used for
+  'blogs' and 'wiki' authentication realms. The second set of
+  credentials would be used for all other realms. For 'login' realm of
+  '192.168.101.33', the first set of credentials would be used. For any
+  other realm of '192.168.101.33' authentication would not be done. For
+  the NTLM authentication required by 'example:80', the first set of
+  credentials would be used. For 'sso' realms of all other servers, the
+  first set of credentials would be used, since it is configured as
+  'default'.
+
+  NTLM does not use the notion of realms. The domain name may be
+  specified as the value for 'realm' attribute in case of NTLM.
+-->
+
+<auth-configuration>
+
+</auth-configuration>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/log4j.properties	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,92 @@
+# Define some default values that can be overridden by system properties
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# RootLogger - DailyRollingFileAppender
+log4j.rootLogger=INFO,DRFA
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#special logging requirements for some commandline tools
+log4j.logger.org.apache.nutch.crawl.Crawl=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.Injector=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.Generator=INFO,cmdstdout
+log4j.logger.org.apache.nutch.fetcher.Fetcher=INFO,cmdstdout
+log4j.logger.org.apache.nutch.parse.ParseSegment=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.CrawlDbReader=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.CrawlDbMerger=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.LinkDbReader=INFO,cmdstdout
+log4j.logger.org.apache.nutch.segment.SegmentReader=INFO,cmdstdout
+log4j.logger.org.apache.nutch.segment.SegmentMerger=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.CrawlDb=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.LinkDb=INFO,cmdstdout
+log4j.logger.org.apache.nutch.crawl.LinkDbMerger=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.solr.SolrIndexer=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.solr.SolrWriter=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.solr.SolrDeleteDuplicates=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.solr.SolrClean=INFO,cmdstdout
+log4j.logger.org.apache.nutch.scoring.webgraph.WebGraph=INFO,cmdstdout
+log4j.logger.org.apache.nutch.scoring.webgraph.LinkRank=INFO,cmdstdout
+log4j.logger.org.apache.nutch.scoring.webgraph.Loops=INFO,cmdstdout
+log4j.logger.org.apache.nutch.scoring.webgraph.ScoreUpdater=INFO,cmdstdout
+log4j.logger.org.apache.nutch.parse.ParserChecker=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.IndexingFiltersChecker=INFO,cmdstdout
+log4j.logger.org.apache.nutch.tools.FreeGenerator=INFO,cmdstdout
+log4j.logger.org.apache.nutch.util.domain.DomainStatistics=INFO,cmdstdout
+log4j.logger.org.apache.nutch.tools.CrawlDBScanner=INFO,cmdstdout
+
+log4j.logger.org.apache.nutch=INFO
+log4j.logger.org.apache.hadoop=WARN
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+# Debugging Pattern format: Date LogLevel LoggerName (FileName:MethodName:LineNo) LogMessage
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# stdout
+# Add *stdout* to rootlogger above if you want to use this 
+#
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# plain layout used for commandline tools to output to console
+#
+log4j.appender.cmdstdout=org.apache.log4j.ConsoleAppender
+log4j.appender.cmdstdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.cmdstdout.layout.ConversionPattern=%m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/mpiwg-dom-parser.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,11 @@
+<mpiwg-dom-parser>
+<filter>
+
+<name>main_content</name>
+
+ <tagname>div</tagname>
+
+ <tagclass>main</tagclass>
+
+</filter>
+</mpiwg-dom-parser>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/mpiwg-parser.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,54 @@
+<mpiwg-parser>
+<filter>
+
+<name>persons</name>
+
+ <searchPattern><![CDATA[<span class=\"mpiwg-first_name\">(.*?)</span><span class=\"mpiwg-last_name\">(.*?)</span>]]></searchPattern>
+
+ <!-- so viele namen wie oben im searchPattern gruppen angegeben sind--> 
+
+ <group-name>first_name</group-name>
+ <group-name>last_name</group-name>
+
+
+ <!-- string, der die Zeile im HTML identifizert, in der das pattern angewendet werden soll -->
+
+ <line-identification>mpiwg-first_name</line-identification>
+
+</filter>
+
+<filter>
+
+<name>projects_title</name>
+
+ <searchPattern><![CDATA[<h1 class=\"mpiwg-title\">(.*?)</h1>]]></searchPattern>
+
+<!-- so viele namen wie oben im searchPattern gruppen angegeben sind--> 
+
+<group-name>project_title</group-name> 
+ 
+
+ <!-- string, der die Zeile im HTML identifizert, in der das pattern angewendet werden soll -->
+
+ <line-identification>mpiwg-title</line-identification> 
+
+</filter>
+<filter>
+
+<name>projects_author</name>
+<multiline>5</multiline>
+ <searchPattern><![CDATA[<a class=\"mpiwg-author\"\s*href=\"(.*?)\">\s*(.*?)</a>]]></searchPattern>
+
+<!-- so viele namen wie oben im searchPattern gruppen angegeben sind-->
+
+ <group-name>project_author_url</group-name>
+ <group-name>project_author</group-name>
+ 
+
+<!-- string, der die Zeile im HTML identifizert, in der das pattern angewendet werden soll -->
+
+ <line-identification>mpiwg-author</line-identification>
+
+</filter>
+
+</mpiwg-parser>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/nutch-conf.xsl	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="nutch-conf">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><xsl:value-of select="name"/></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/nutch-default.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,1443 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- Do not modify this file directly.  Instead, copy entries that you -->
+<!-- wish to modify from this file into nutch-site.xml and change them -->
+<!-- there.  If nutch-site.xml does not already exist, create it.      -->
+
+<configuration>
+
+<!-- file properties -->
+
+<property>
+  <name>file.content.limit</name>
+  <value>65536</value>
+  <description>The length limit for downloaded content using the file://
+  protocol, in bytes. If this value is nonnegative (>=0), content longer
+  than it will be truncated; otherwise, no truncation at all. Do not
+  confuse this setting with the http.content.limit setting.
+  </description>
+</property>
+  
+<property>
+  <name>file.crawl.parent</name>
+  <value>true</value>
+  <description>The crawler is not restricted to the directories that you specified in the
+    Urls file but it is jumping into the parent directories as well. For your own crawlings you can
+    change this bahavior (set to false) the way that only directories beneath the directories that you specify get
+    crawled.</description>
+</property>
+
+<property>
+  <name>file.content.ignored</name>
+  <value>true</value>
+  <description>If true, no file content will be saved during fetch.
+  And it is probably what we want to set most of time, since file:// URLs
+  are meant to be local and we can always use them directly at parsing
+  and indexing stages. Otherwise file contents will be saved.
+  !! NO IMPLEMENTED YET !!
+  </description>
+</property>
+
+<!-- HTTP properties -->
+
+<property>
+  <name>http.agent.name</name>
+  <value></value>
+  <description>HTTP 'User-Agent' request header. MUST NOT be empty - 
+  please set this to a single word uniquely related to your organization.
+
+  NOTE: You should also check other related properties:
+
+	http.robots.agents
+	http.agent.description
+	http.agent.url
+	http.agent.email
+	http.agent.version
+
+  and set their values appropriately.
+
+  </description>
+</property>
+
+<property>
+  <name>http.robots.agents</name>
+  <value>*</value>
+  <description>The agent strings we'll look for in robots.txt files,
+  comma-separated, in decreasing order of precedence. You should
+  put the value of http.agent.name as the first agent name, and keep the
+  default * at the end of the list. E.g.: BlurflDev,Blurfl,*
+  </description>
+</property>
+
+<property>
+  <name>http.robots.403.allow</name>
+  <value>true</value>
+  <description>Some servers return HTTP status 403 (Forbidden) if
+  /robots.txt doesn't exist. This should probably mean that we are
+  allowed to crawl the site nonetheless. If this is set to false,
+  then such sites will be treated as forbidden.</description>
+</property>
+
+<property>
+  <name>http.agent.description</name>
+  <value></value>
+  <description>Further description of our bot- this text is used in
+  the User-Agent header.  It appears in parenthesis after the agent name.
+  </description>
+</property>
+
+<property>
+  <name>http.agent.url</name>
+  <value></value>
+  <description>A URL to advertise in the User-Agent header.  This will 
+   appear in parenthesis after the agent name. Custom dictates that this
+   should be a URL of a page explaining the purpose and behavior of this
+   crawler.
+  </description>
+</property>
+
+<property>
+  <name>http.agent.email</name>
+  <value></value>
+  <description>An email address to advertise in the HTTP 'From' request
+   header and User-Agent header. A good practice is to mangle this
+   address (e.g. 'info at example dot com') to avoid spamming.
+  </description>
+</property>
+
+<property>
+  <name>http.agent.version</name>
+  <value>Nutch-1.7-SNAPSHOT</value>
+  <description>A version string to advertise in the User-Agent 
+   header.</description>
+</property>
+
+<property>
+  <name>http.agent.host</name>
+  <value></value>
+  <description>Name or IP address of the host on which the Nutch crawler
+  would be running. Currently this is used by 'protocol-httpclient'
+  plugin.
+  </description>
+</property>
+
+<property>
+  <name>http.timeout</name>
+  <value>10000</value>
+  <description>The default network timeout, in milliseconds.</description>
+</property>
+
+<property>
+  <name>http.max.delays</name>
+  <value>100</value>
+  <description>The number of times a thread will delay when trying to
+  fetch a page.  Each time it finds that a host is busy, it will wait
+  fetcher.server.delay.  After http.max.delays attepts, it will give
+  up on the page for now.</description>
+</property>
+
+<property>
+  <name>http.content.limit</name>
+  <value>65536</value>
+  <description>The length limit for downloaded content using the http://
+  protocol, in bytes. If this value is nonnegative (>=0), content longer
+  than it will be truncated; otherwise, no truncation at all. Do not
+  confuse this setting with the file.content.limit setting.
+  </description>
+</property>
+
+<property>
+  <name>http.proxy.host</name>
+  <value></value>
+  <description>The proxy hostname.  If empty, no proxy is used.</description>
+</property>
+
+<property>
+  <name>http.proxy.port</name>
+  <value></value>
+  <description>The proxy port.</description>
+</property>
+
+<property>
+  <name>http.proxy.username</name>
+  <value></value>
+  <description>Username for proxy. This will be used by
+  'protocol-httpclient', if the proxy server requests basic, digest
+  and/or NTLM authentication. To use this, 'protocol-httpclient' must
+  be present in the value of 'plugin.includes' property.
+  NOTE: For NTLM authentication, do not prefix the username with the
+  domain, i.e. 'susam' is correct whereas 'DOMAIN\susam' is incorrect.
+  </description>
+</property>
+
+<property>
+  <name>http.proxy.password</name>
+  <value></value>
+  <description>Password for proxy. This will be used by
+  'protocol-httpclient', if the proxy server requests basic, digest
+  and/or NTLM authentication. To use this, 'protocol-httpclient' must
+  be present in the value of 'plugin.includes' property.
+  </description>
+</property>
+
+<property>
+  <name>http.proxy.realm</name>
+  <value></value>
+  <description>Authentication realm for proxy. Do not define a value
+  if realm is not required or authentication should take place for any
+  realm. NTLM does not use the notion of realms. Specify the domain name
+  of NTLM authentication as the value for this property. To use this,
+  'protocol-httpclient' must be present in the value of
+  'plugin.includes' property.
+  </description>
+</property>
+
+<property>
+  <name>http.auth.file</name>
+  <value>httpclient-auth.xml</value>
+  <description>Authentication configuration file for
+  'protocol-httpclient' plugin.
+  </description>
+</property>
+
+<property>
+  <name>http.verbose</name>
+  <value>false</value>
+  <description>If true, HTTP will log more verbosely.</description>
+</property>
+
+<property>
+  <name>http.redirect.max</name>
+  <value>0</value>
+  <description>The maximum number of redirects the fetcher will follow when
+  trying to fetch a page. If set to negative or 0, fetcher won't immediately
+  follow redirected URLs, instead it will record them for later fetching.
+  </description>
+</property>
+
+<property>
+  <name>http.useHttp11</name>
+  <value>false</value>
+  <description>NOTE: at the moment this works only for protocol-httpclient.
+  If true, use HTTP 1.1, if false use HTTP 1.0 .
+  </description>
+</property>
+
+<property>
+  <name>http.accept.language</name>
+  <value>en-us,en-gb,en;q=0.7,*;q=0.3</value>
+  <description>Value of the "Accept-Language" request header field.
+  This allows selecting non-English language as default one to retrieve.
+  It is a useful setting for search engines build for certain national group.
+  </description>
+</property>
+
+<property>
+  <name>http.accept</name>
+  <value>text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8</value>
+  <description>Value of the "Accept" request header field.
+  </description>
+</property>
+
+<!-- FTP properties -->
+
+<property>
+  <name>ftp.username</name>
+  <value>anonymous</value>
+  <description>ftp login username.</description>
+</property>
+
+<property>
+  <name>ftp.password</name>
+  <value>anonymous@example.com</value>
+  <description>ftp login password.</description>
+</property>
+
+<property>
+  <name>ftp.content.limit</name>
+  <value>65536</value> 
+  <description>The length limit for downloaded content, in bytes.
+  If this value is nonnegative (>=0), content longer than it will be truncated;
+  otherwise, no truncation at all.
+  Caution: classical ftp RFCs never defines partial transfer and, in fact,
+  some ftp servers out there do not handle client side forced close-down very
+  well. Our implementation tries its best to handle such situations smoothly.
+  </description>
+</property>
+
+<property>
+  <name>ftp.timeout</name>
+  <value>60000</value>
+  <description>Default timeout for ftp client socket, in millisec.
+  Please also see ftp.keep.connection below.</description>
+</property>
+
+<property>
+  <name>ftp.server.timeout</name>
+  <value>100000</value>
+  <description>An estimation of ftp server idle time, in millisec.
+  Typically it is 120000 millisec for many ftp servers out there.
+  Better be conservative here. Together with ftp.timeout, it is used to
+  decide if we need to delete (annihilate) current ftp.client instance and
+  force to start another ftp.client instance anew. This is necessary because
+  a fetcher thread may not be able to obtain next request from queue in time
+  (due to idleness) before our ftp client times out or remote server
+  disconnects. Used only when ftp.keep.connection is true (please see below).
+  </description>
+</property>
+
+<property>
+  <name>ftp.keep.connection</name>
+  <value>false</value>
+  <description>Whether to keep ftp connection. Useful if crawling same host
+  again and again. When set to true, it avoids connection, login and dir list
+  parser setup for subsequent urls. If it is set to true, however, you must
+  make sure (roughly):
+  (1) ftp.timeout is less than ftp.server.timeout
+  (2) ftp.timeout is larger than (fetcher.threads.fetch * fetcher.server.delay)
+  Otherwise there will be too many "delete client because idled too long"
+  messages in thread logs.</description>
+</property>
+
+<property>
+  <name>ftp.follow.talk</name>
+  <value>false</value>
+  <description>Whether to log dialogue between our client and remote
+  server. Useful for debugging.</description>
+</property>
+
+<!-- web db properties -->
+
+<property>
+  <name>db.default.fetch.interval</name>
+  <value>30</value>
+  <description>(DEPRECATED) The default number of days between re-fetches of a page.
+  </description>
+</property>
+
+<property>
+  <name>db.fetch.interval.default</name>
+  <value>2592000</value>
+  <description>The default number of seconds between re-fetches of a page (30 days).
+  </description>
+</property>
+
+<property>
+  <name>db.fetch.interval.max</name>
+  <value>7776000</value>
+  <description>The maximum number of seconds between re-fetches of a page
+  (90 days). After this period every page in the db will be re-tried, no
+  matter what is its status.
+  </description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.class</name>
+  <value>org.apache.nutch.crawl.DefaultFetchSchedule</value>
+  <description>The implementation of fetch schedule. DefaultFetchSchedule simply
+  adds the original fetchInterval to the last fetch time, regardless of
+  page changes.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.adaptive.inc_rate</name>
+  <value>0.4</value>
+  <description>If a page is unmodified, its fetchInterval will be
+  increased by this rate. This value should not
+  exceed 0.5, otherwise the algorithm becomes unstable.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.adaptive.dec_rate</name>
+  <value>0.2</value>
+  <description>If a page is modified, its fetchInterval will be
+  decreased by this rate. This value should not
+  exceed 0.5, otherwise the algorithm becomes unstable.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.adaptive.min_interval</name>
+  <value>60.0</value>
+  <description>Minimum fetchInterval, in seconds.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.adaptive.max_interval</name>
+  <value>31536000.0</value>
+  <description>Maximum fetchInterval, in seconds (365 days).
+  NOTE: this is limited by db.fetch.interval.max. Pages with
+  fetchInterval larger than db.fetch.interval.max
+  will be fetched anyway.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.adaptive.sync_delta</name>
+  <value>true</value>
+  <description>If true, try to synchronize with the time of page change.
+  by shifting the next fetchTime by a fraction (sync_rate) of the difference
+  between the last modification time, and the last fetch time.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.adaptive.sync_delta_rate</name>
+  <value>0.3</value>
+  <description>See sync_delta for description. This value should not
+  exceed 0.5, otherwise the algorithm becomes unstable.</description>
+</property>
+
+<property>
+  <name>db.fetch.schedule.mime.file</name>
+  <value>adaptive-mimetypes.txt</value>
+  <description>The configuration file for the MimeAdaptiveFetchSchedule.
+  </description>
+</property>
+
+<property>
+  <name>db.update.additions.allowed</name>
+  <value>true</value>
+  <description>If true, updatedb will add newly discovered URLs, if false
+  only already existing URLs in the CrawlDb will be updated and no new
+  URLs will be added.
+  </description>
+</property>
+
+<property>
+  <name>db.preserve.backup</name>
+  <value>true</value>
+  <description>If true, updatedb will keep a backup of the previous CrawlDB
+  version in the old directory. In case of disaster, one can rename old to 
+  current and restore the CrawlDB to its previous state.
+  </description>
+</property>
+
+<property>
+  <name>db.update.purge.404</name>
+  <value>false</value>
+  <description>If true, updatedb will add purge records with status DB_GONE
+  from the CrawlDB.
+  </description>
+</property>
+
+<property>
+  <name>db.update.max.inlinks</name>
+  <value>10000</value>
+  <description>Maximum number of inlinks to take into account when updating 
+  a URL score in the crawlDB. Only the best scoring inlinks are kept. 
+  </description>
+</property>
+
+<property>
+  <name>db.ignore.internal.links</name>
+  <value>true</value>
+  <description>If true, when adding new links to a page, links from
+  the same host are ignored.  This is an effective way to limit the
+  size of the link database, keeping only the highest quality
+  links.
+  </description>
+</property>
+
+<property>
+  <name>db.ignore.external.links</name>
+  <value>false</value>
+  <description>If true, outlinks leading from a page to external hosts
+  will be ignored. This is an effective way to limit the crawl to include
+  only initially injected hosts, without creating complex URLFilters.
+  </description>
+</property>
+
+ <property>
+  <name>db.injector.overwrite</name>
+  <value>false</value>
+  <description>Whether existing records in the CrawlDB will be overwritten
+  by injected records.
+  </description>
+</property>
+
+<property>
+  <name>db.injector.update</name>
+  <value>false</value>
+  <description>If true existing records in the CrawlDB will be updated with
+  injected records. Old meta data is preserved. The db.injector.overwrite
+  parameter has precedence.
+  </description>
+</property>
+
+<property>
+  <name>db.score.injected</name>
+  <value>1.0</value>
+  <description>The score of new pages added by the injector.
+  </description>
+</property>
+
+<property>
+  <name>db.score.link.external</name>
+  <value>1.0</value>
+  <description>The score factor for new pages added due to a link from
+  another host relative to the referencing page's score. Scoring plugins
+  may use this value to affect initial scores of external links.
+  </description>
+</property>
+
+<property>
+  <name>db.score.link.internal</name>
+  <value>1.0</value>
+  <description>The score factor for pages added due to a link from the
+  same host, relative to the referencing page's score. Scoring plugins
+  may use this value to affect initial scores of internal links.
+  </description>
+</property>
+
+<property>
+  <name>db.score.count.filtered</name>
+  <value>false</value>
+  <description>The score value passed to newly discovered pages is
+  calculated as a fraction of the original page score divided by the
+  number of outlinks. If this option is false, only the outlinks that passed
+  URLFilters will count, if it's true then all outlinks will count.
+  </description>
+</property>
+
+<property>
+  <name>db.max.inlinks</name>
+  <value>10000</value>
+  <description>Maximum number of Inlinks per URL to be kept in LinkDb.
+  If "invertlinks" finds more inlinks than this number, only the first
+  N inlinks will be stored, and the rest will be discarded.
+  </description>
+</property>
+
+<property>
+  <name>db.max.outlinks.per.page</name>
+  <value>100</value>
+  <description>The maximum number of outlinks that we'll process for a page.
+  If this value is nonnegative (>=0), at most db.max.outlinks.per.page outlinks
+  will be processed for a page; otherwise, all outlinks will be processed.
+  </description>
+</property>
+
+<property>
+  <name>db.max.anchor.length</name>
+  <value>100</value>
+  <description>The maximum number of characters permitted in an anchor.
+  </description>
+</property>
+
+ <property>
+  <name>db.parsemeta.to.crawldb</name>
+  <value></value>
+  <description>Comma-separated list of parse metadata keys to transfer to the crawldb (NUTCH-779).
+   Assuming for instance that the languageidentifier plugin is enabled, setting the value to 'lang' 
+   will copy both the key 'lang' and its value to the corresponding entry in the crawldb.
+  </description>
+</property>
+
+<property>
+  <name>db.fetch.retry.max</name>
+  <value>3</value>
+  <description>The maximum number of times a url that has encountered
+  recoverable errors is generated for fetch.</description>
+</property>
+
+<property>
+  <name>db.signature.class</name>
+  <value>org.apache.nutch.crawl.MD5Signature</value>
+  <description>The default implementation of a page signature. Signatures
+  created with this implementation will be used for duplicate detection
+  and removal.</description>
+</property>
+
+<property>
+  <name>db.signature.text_profile.min_token_len</name>
+  <value>2</value>
+  <description>Minimum token length to be included in the signature.
+  </description>
+</property>
+
+<property>
+  <name>db.signature.text_profile.quant_rate</name>
+  <value>0.01</value>
+  <description>Profile frequencies will be rounded down to a multiple of
+  QUANT = (int)(QUANT_RATE * maxFreq), where maxFreq is a maximum token
+  frequency. If maxFreq > 1 then QUANT will be at least 2, which means that
+  for longer texts tokens with frequency 1 will always be discarded.
+  </description>
+</property>
+
+<!-- generate properties -->
+
+<property>
+  <name>generate.max.count</name>
+  <value>-1</value>
+  <description>The maximum number of urls in a single
+  fetchlist.  -1 if unlimited. The urls are counted according
+  to the value of the parameter generator.count.mode.
+  </description>
+</property>
+
+<property>
+  <name>generate.count.mode</name>
+  <value>host</value>
+  <description>Determines how the URLs are counted for generator.max.count.
+  Default value is 'host' but can be 'domain'. Note that we do not count 
+  per IP in the new version of the Generator.
+  </description>
+</property>
+
+<property>
+  <name>generate.update.crawldb</name>
+  <value>false</value>
+  <description>For highly-concurrent environments, where several
+  generate/fetch/update cycles may overlap, setting this to true ensures
+  that generate will create different fetchlists even without intervening
+  updatedb-s, at the cost of running an additional job to update CrawlDB.
+  If false, running generate twice without intervening
+  updatedb will generate identical fetchlists.</description>
+</property>
+
+<property>
+  <name>generate.max.per.host</name>
+  <value>-1</value>
+  <description>(Deprecated). Use generate.max.count and generate.count.mode instead.
+  The maximum number of urls per host in a single
+  fetchlist.  -1 if unlimited.</description>
+</property>
+
+<property>
+  <name>generate.min.score</name>
+  <value>0</value>
+  <description>Select only entries with a score larger than
+  generate.min.score.</description>
+</property>
+
+<property>
+  <name>generate.min.interval</name>
+  <value>-1</value>
+  <description>Select only entries with a retry interval lower than
+  generate.min.interval. A value of -1 disables this check.</description>
+</property>
+
+<!-- urlpartitioner properties -->
+
+<property>
+  <name>partition.url.mode</name>
+  <value>byHost</value>
+  <description>Determines how to partition URLs. Default value is 'byHost', 
+  also takes 'byDomain' or 'byIP'. 
+  </description>
+</property>
+
+<property>
+  <name>crawl.gen.delay</name>
+  <value>604800000</value>
+  <description>
+   This value, expressed in milliseconds, defines how long we should keep the lock on records 
+   in CrawlDb that were just selected for fetching. If these records are not updated 
+   in the meantime, the lock is canceled, i.e. they become eligible for selecting. 
+   Default value of this is 7 days (604800000 ms).
+  </description>
+</property>
+
+<!-- fetcher properties -->
+
+<property>
+  <name>fetcher.server.delay</name>
+  <value>5.0</value>
+  <description>The number of seconds the fetcher will delay between 
+   successive requests to the same server.</description>
+</property>
+
+<property>
+  <name>fetcher.server.min.delay</name>
+  <value>0.0</value>
+  <description>The minimum number of seconds the fetcher will delay between 
+  successive requests to the same server. This value is applicable ONLY
+  if fetcher.threads.per.host is greater than 1 (i.e. the host blocking
+  is turned off).</description>
+</property>
+
+<property>
+ <name>fetcher.max.crawl.delay</name>
+ <value>30</value>
+ <description>
+ If the Crawl-Delay in robots.txt is set to greater than this value (in
+ seconds) then the fetcher will skip this page, generating an error report.
+ If set to -1 the fetcher will never skip such pages and will wait the
+ amount of time retrieved from robots.txt Crawl-Delay, however long that
+ might be.
+ </description>
+</property> 
+
+<property>
+  <name>fetcher.threads.fetch</name>
+  <value>10</value>
+  <description>The number of FetcherThreads the fetcher should use.
+  This is also determines the maximum number of requests that are
+  made at once (each FetcherThread handles one connection). The total
+  number of threads running in distributed mode will be the number of
+  fetcher threads * number of nodes as fetcher has one map task per node.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.threads.per.queue</name>
+  <value>1</value>
+  <description>This number is the maximum number of threads that
+    should be allowed to access a queue at one time. Replaces 
+    deprecated parameter 'fetcher.threads.per.host'.
+   </description>
+</property>
+
+<property>
+  <name>fetcher.queue.mode</name>
+  <value>byHost</value>
+  <description>Determines how to put URLs into queues. Default value is 'byHost', 
+  also takes 'byDomain' or 'byIP'. Replaces the deprecated parameter 
+  'fetcher.threads.per.host.by.ip'.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.verbose</name>
+  <value>false</value>
+  <description>If true, fetcher will log more verbosely.</description>
+</property>
+
+<property>
+  <name>fetcher.parse</name>
+  <value>false</value>
+  <description>If true, fetcher will parse content. Default is false, which means
+  that a separate parsing step is required after fetching is finished.</description>
+</property>
+
+<property>
+  <name>fetcher.store.content</name>
+  <value>true</value>
+  <description>If true, fetcher will store content.</description>
+</property>
+
+<property>
+  <name>fetcher.timelimit.mins</name>
+  <value>-1</value>
+  <description>This is the number of minutes allocated to the fetching.
+  Once this value is reached, any remaining entry from the input URL list is skipped 
+  and all active queues are emptied. The default value of -1 deactivates the time limit.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.max.exceptions.per.queue</name>
+  <value>-1</value>
+  <description>The maximum number of protocol-level exceptions (e.g. timeouts) per
+  host (or IP) queue. Once this value is reached, any remaining entries from this
+  queue are purged, effectively stopping the fetching from this host/IP. The default
+  value of -1 deactivates this limit.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.throughput.threshold.pages</name>
+  <value>-1</value>
+  <description>The threshold of minimum pages per second. If the fetcher downloads less
+  pages per second than the configured threshold, the fetcher stops, preventing slow queue's
+  from stalling the throughput. This threshold must be an integer. This can be useful when
+  fetcher.timelimit.mins is hard to determine. The default value of -1 disables this check.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.throughput.threshold.retries</name>
+  <value>5</value>
+  <description>The number of times the fetcher.throughput.threshold is allowed to be exceeded.
+  This settings prevents accidental slow downs from immediately killing the fetcher thread.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.throughput.threshold.check.after</name>
+  <value>5</value>
+  <description>The number of minutes after which the throughput check is enabled.</description>
+</property>
+
+<property>
+  <name>fetcher.threads.timeout.divisor</name>
+  <value>2</value>
+  <description>(EXPERT)The thread time-out divisor to use. By default threads have a time-out
+  value of mapred.task.timeout / 2. Increase this setting if the fetcher waits too
+  long before killing hanged threads. Be careful, a too high setting (+8) will most likely kill the
+  fetcher threads prematurely.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.queue.depth.multiplier</name>
+  <value>50</value>
+  <description>(EXPERT)The fetcher buffers the incoming URLs into queues based on the [host|domain|IP]
+  (see param fetcher.queue.mode). The depth of the queue is the number of threads times the value of this parameter.
+  A large value requires more memory but can improve the performance of the fetch when the order of the URLS in the fetch list
+  is not optimal.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.follow.outlinks.depth</name>
+  <value>-1</value>
+  <description>(EXPERT)When fetcher.parse is true and this value is greater than 0 the fetcher will extract outlinks
+  and follow until the desired depth is reached. A value of 1 means all generated pages are fetched and their first degree
+  outlinks are fetched and parsed too. Be careful, this feature is in itself agnostic of the state of the CrawlDB and does not
+  know about already fetched pages. A setting larger than 2 will most likely fetch home pages twice in the same fetch cycle.
+  It is highly recommended to set db.ignore.external.links to true to restrict the outlink follower to URL's within the same
+  domain. When disabled (false) the feature is likely to follow duplicates even when depth=1.
+  A value of -1 of 0 disables this feature.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.follow.outlinks.num.links</name>
+  <value>4</value>
+  <description>(EXPERT)The number of outlinks to follow when fetcher.follow.outlinks.depth is enabled. Be careful, this can multiply
+  the total number of pages to fetch. This works with fetcher.follow.outlinks.depth.divisor, by default settings the followed outlinks
+  at depth 1 is 8, not 4.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.follow.outlinks.depth.divisor</name>
+  <value>2</value>
+  <description>(EXPERT)The divisor of fetcher.follow.outlinks.num.links per fetcher.follow.outlinks.depth. This decreases the number
+  of outlinks to follow by increasing depth. The formula used is: outlinks = floor(divisor / depth * num.links). This prevents
+  exponential growth of the fetch list.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.follow.outlinks.ignore.external</name>
+  <value>true</value>  
+  <description>Whether to ignore or follow external links. Set db.ignore.external.links to false and this to true to store outlinks
+  in the output but not follow them. If db.ignore.external.links is true this directive is ignored.
+  </description>
+</property>
+
+<!-- moreindexingfilter plugin properties -->
+
+<property>
+  <name>moreIndexingFilter.indexMimeTypeParts</name>
+  <value>true</value>
+  <description>Determines whether the index-more plugin will split the mime-type
+  in sub parts, this requires the type field to be multi valued. Set to true for backward
+  compatibility. False will not split the mime-type.
+  </description>
+</property>
+
+<property>
+  <name>moreIndexingFilter.mapMimeTypes</name>
+  <value>false</value>
+  <description>Determines whether MIME-type mapping is enabled. It takes a
+  plain text file with mapped MIME-types. With it the user can map both
+  application/xhtml+xml and text/html to the same target MIME-type so it
+  can be treated equally in an index. See conf/contenttype-mapping.txt.
+  </description>
+</property>
+
+<!-- AnchorIndexing filter plugin properties -->
+
+<property>
+  <name>anchorIndexingFilter.deduplicate</name>
+  <value>false</value>
+  <description>With this enabled the indexer will case-insensitive deduplicate anchors
+  before indexing. This prevents possible hundreds or thousands of identical anchors for
+  a given page to be indexed but will affect the search scoring (i.e. tf=1.0f).
+  </description>
+</property>
+
+<!-- indexingfilter plugin properties -->
+
+<property>
+  <name>indexingfilter.order</name>
+  <value></value>
+  <description>The order by which index filters are applied.
+  If empty, all available index filters (as dictated by properties
+  plugin-includes and plugin-excludes above) are loaded and applied in system
+  defined order. If not empty, only named filters are loaded and applied
+  in given order. For example, if this property has value:
+  org.apache.nutch.indexer.basic.BasicIndexingFilter org.apache.nutch.indexer.more.MoreIndexingFilter
+  then BasicIndexingFilter is applied first, and MoreIndexingFilter second.
+  
+  Filter ordering might have impact on result if one filter depends on output of
+  another filter.
+  </description>
+</property>
+
+<property>
+  <name>indexer.score.power</name>
+  <value>0.5</value>
+  <description>Determines the power of link analyis scores.  Each
+  pages's boost is set to <i>score<sup>scorePower</sup></i> where
+  <i>score</i> is its link analysis score and <i>scorePower</i> is the
+  value of this parameter.  This is compiled into indexes, so, when
+  this is changed, pages must be re-indexed for it to take
+  effect.</description>
+</property>
+
+<property>
+  <name>indexer.max.title.length</name>
+  <value>100</value>
+  <description>The maximum number of characters of a title that are indexed.
+  </description>
+</property>
+
+<property>
+  <name>indexer.max.content.length</name>
+  <value>-1</value>
+  <description>The maximum number of characters of a content that are indexed.
+  Content beyond the limit is truncated. A value of -1 disables this check.
+  </description>
+</property>
+
+<property>
+  <name>indexer.add.domain</name>
+  <value>false</value>
+  <description>Whether to add the domain field to a NutchDocument.</description>
+</property>
+
+<property>
+  <name>indexer.skip.notmodified</name>
+  <value>false</value>
+  <description>Whether the indexer will skip records with a db_notmodified status.
+  </description>
+</property>
+
+<!-- URL normalizer properties -->
+
+<property>
+  <name>urlnormalizer.order</name>
+  <value>org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer</value>
+  <description>Order in which normalizers will run. If any of these isn't
+  activated it will be silently skipped. If other normalizers not on the
+  list are activated, they will run in random order after the ones
+  specified here are run.
+  </description>
+</property>
+
+<property>
+  <name>urlnormalizer.regex.file</name>
+  <value>regex-normalize.xml</value>
+  <description>Name of the config file used by the RegexUrlNormalizer class.
+  </description>
+</property>
+
+<property>
+  <name>urlnormalizer.loop.count</name>
+  <value>1</value>
+  <description>Optionally loop through normalizers several times, to make
+  sure that all transformations have been performed.
+  </description>
+</property>
+
+<!-- mime properties -->
+
+<!--
+<property>
+  <name>mime.types.file</name>
+  <value>tika-mimetypes.xml</value>
+  <description>Name of file in CLASSPATH containing filename extension and
+  magic sequence to mime types mapping information. Overrides the default Tika config 
+  if specified.
+  </description>
+</property>
+-->
+
+<property>
+  <name>mime.type.magic</name>
+  <value>true</value>
+  <description>Defines if the mime content type detector uses magic resolution.
+  </description>
+</property>
+
+<!-- plugin properties -->
+
+<property>
+  <name>plugin.folders</name>
+  <value>plugins</value>
+  <description>Directories where nutch plugins are located.  Each
+  element may be a relative or absolute path.  If absolute, it is used
+  as is.  If relative, it is searched for on the classpath.</description>
+</property>
+
+<property>
+  <name>plugin.auto-activation</name>
+  <value>true</value>
+  <description>Defines if some plugins that are not activated regarding
+  the plugin.includes and plugin.excludes properties must be automaticaly
+  activated if they are needed by some actived plugins.
+  </description>
+</property>
+
+<property>
+  <name>plugin.includes</name>
+  <value>protocol-http|urlfilter-regex|parse-(html|tika)|index-(basic|anchor)|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
+  <description>Regular expression naming plugin directory names to
+  include.  Any plugin not matching this expression is excluded.
+  In any case you need at least include the nutch-extensionpoints plugin. By
+  default Nutch includes crawling just HTML and plain text via HTTP,
+  and basic indexing and search plugins. In order to use HTTPS please enable 
+  protocol-httpclient, but be aware of possible intermittent problems with the 
+  underlying commons-httpclient library.
+  </description>
+</property>
+
+<property>
+  <name>plugin.excludes</name>
+  <value></value>
+  <description>Regular expression naming plugin directory names to exclude.  
+  </description>
+</property>
+
+<property>
+  <name>urlmeta.tags</name>
+  <value></value>
+  <description>
+    To be used in conjunction with features introduced in NUTCH-655, which allows
+    for custom metatags to be injected alongside your crawl URLs. Specifying those
+    custom tags here will allow for their propagation into a pages outlinks, as
+    well as allow for them to be included as part of an index.
+    Values should be comma-delimited. ("tag1,tag2,tag3") Do not pad the tags with
+    white-space at their boundaries, if you are using anything earlier than Hadoop-0.21. 
+  </description>
+</property>
+
+<!-- parser properties -->
+
+<property>
+  <name>parse.plugin.file</name>
+  <value>parse-plugins.xml</value>
+  <description>The name of the file that defines the associations between
+  content-types and parsers.</description>
+</property>
+
+<property>
+  <name>parser.character.encoding.default</name>
+  <value>windows-1252</value>
+  <description>The character encoding to fall back to when no other information
+  is available</description>
+</property>
+
+<property>
+  <name>encodingdetector.charset.min.confidence</name>
+  <value>-1</value>
+  <description>A integer between 0-100 indicating minimum confidence value
+  for charset auto-detection. Any negative value disables auto-detection.
+  </description>
+</property>
+
+<property>
+  <name>parser.caching.forbidden.policy</name>
+  <value>content</value>
+  <description>If a site (or a page) requests through its robot metatags
+  that it should not be shown as cached content, apply this policy. Currently
+  three keywords are recognized: "none" ignores any "noarchive" directives.
+  "content" doesn't show the content, but shows summaries (snippets).
+  "all" doesn't show either content or summaries.</description>
+</property>
+
+<property>
+  <name>parser.html.impl</name>
+  <value>neko</value>
+  <description>HTML Parser implementation. Currently the following keywords
+  are recognized: "neko" uses NekoHTML, "tagsoup" uses TagSoup.
+  </description>
+</property>
+
+<property>
+  <name>parser.html.form.use_action</name>
+  <value>false</value>
+  <description>If true, HTML parser will collect URLs from form action
+  attributes. This may lead to undesirable behavior (submitting empty
+  forms during next fetch cycle). If false, form action attribute will
+  be ignored.</description>
+</property>
+
+<property>
+  <name>parser.html.outlinks.ignore_tags</name>
+  <value></value>
+  <description>Comma separated list of HTML tags, from which outlinks 
+  shouldn't be extracted. Nutch takes links from: a, area, form, frame, 
+  iframe, script, link, img. If you add any of those tags here, it
+  won't be taken. Default is empty list. Probably reasonable value
+  for most people would be "img,script,link".</description>
+</property>
+
+<property>
+  <name>parser.fix.embeddedparams</name>
+  <value>true</value>
+  <description>Whether to fix URL embedded params using semi-colons.
+  See NUTCH-436 and NUTCH-1115</description>
+</property>
+
+<property>
+  <name>htmlparsefilter.order</name>
+  <value></value>
+  <description>The order by which HTMLParse filters are applied.
+  If empty, all available HTMLParse filters (as dictated by properties
+  plugin-includes and plugin-excludes above) are loaded and applied in system
+  defined order. If not empty, only named filters are loaded and applied
+  in given order.
+  HTMLParse filter ordering MAY have an impact
+  on end result, as some filters could rely on the metadata generated by a previous filter.
+  </description>
+</property>
+
+<property>
+  <name>parser.timeout</name>
+  <value>30</value>
+  <description>Timeout in seconds for the parsing of a document, otherwise treats it as an exception and 
+  moves on the the following documents. This parameter is applied to any Parser implementation. 
+  Set to -1 to deactivate, bearing in mind that this could cause
+  the parsing to crash because of a very long or corrupted document.
+  </description>
+</property>
+
+<property>
+  <name>parse.filter.urls</name>
+  <value>true</value>
+  <description>Whether the parser will filter URLs (with the configured URL filters).</description>
+</property>
+
+<property>
+  <name>parse.normalize.urls</name>
+  <value>true</value>
+  <description>Whether the parser will normalize URLs (with the configured URL normalizers).</description>
+</property>
+
+<property>
+  <name>parser.skip.truncated</name>
+  <value>true</value>
+  <description>Boolean value for whether we should skip parsing for truncated documents. By default this 
+  property is activated due to extremely high levels of CPU which parsing can sometimes take.  
+  </description>
+</property>
+
+<!-- urlfilter plugin properties -->
+
+<property>
+  <name>urlfilter.domain.file</name>
+  <value>domain-urlfilter.txt</value>
+  <description>Name of file on CLASSPATH containing either top level domains or
+  hostnames used by urlfilter-domain (DomainURLFilter) plugin.</description>
+</property>
+
+<property>
+  <name>urlfilter.regex.file</name>
+  <value>regex-urlfilter.txt</value>
+  <description>Name of file on CLASSPATH containing regular expressions
+  used by urlfilter-regex (RegexURLFilter) plugin.</description>
+</property>
+
+<property>
+  <name>urlfilter.automaton.file</name>
+  <value>automaton-urlfilter.txt</value>
+  <description>Name of file on CLASSPATH containing regular expressions
+  used by urlfilter-automaton (AutomatonURLFilter) plugin.</description>
+</property>
+
+<property>
+  <name>urlfilter.prefix.file</name>
+  <value>prefix-urlfilter.txt</value>
+  <description>Name of file on CLASSPATH containing url prefixes
+  used by urlfilter-prefix (PrefixURLFilter) plugin.</description>
+</property>
+
+<property>
+  <name>urlfilter.suffix.file</name>
+  <value>suffix-urlfilter.txt</value>
+  <description>Name of file on CLASSPATH containing url suffixes
+  used by urlfilter-suffix (SuffixURLFilter) plugin.</description>
+</property>
+
+<property>
+  <name>urlfilter.order</name>
+  <value></value>
+  <description>The order by which url filters are applied.
+  If empty, all available url filters (as dictated by properties
+  plugin-includes and plugin-excludes above) are loaded and applied in system
+  defined order. If not empty, only named filters are loaded and applied
+  in given order. For example, if this property has value:
+  org.apache.nutch.urlfilter.regex.RegexURLFilter org.apache.nutch.urlfilter.prefix.PrefixURLFilter
+  then RegexURLFilter is applied first, and PrefixURLFilter second.
+  Since all filters are AND'ed, filter ordering does not have impact
+  on end result, but it may have performance implication, depending
+  on relative expensiveness of filters.
+  </description>
+</property>
+
+<!-- scoring filters properties -->
+
+<property>
+  <name>scoring.filter.order</name>
+  <value></value>
+  <description>The order in which scoring filters are applied.
+  This may be left empty (in which case all available scoring
+  filters will be applied in the order defined in plugin-includes
+  and plugin-excludes), or a space separated list of implementation
+  classes.
+  </description>
+</property>
+
+<!-- scoring-depth properties
+ Add 'scoring-depth' to the list of active plugins
+ in the parameter 'plugin.includes' in order to use it.
+ -->
+
+<property>
+  <name>scoring.depth.max</name>
+  <value>1000</value>
+  <description>Max depth value from seed allowed by default.
+  Can be overriden on a per-seed basis by specifying "_maxdepth_=VALUE"
+  as a seed metadata. This plugin adds a "_depth_" metadatum to the pages
+  to track the distance from the seed it was found from. 
+  The depth is used to prioritise URLs in the generation step so that
+  shallower pages are fetched first.
+  </description>
+</property>
+
+<!-- language-identifier plugin properties -->
+
+<property>
+  <name>lang.analyze.max.length</name>
+  <value>2048</value>
+  <description> The maximum bytes of data to uses to indentify
+  the language (0 means full content analysis).
+  The larger is this value, the better is the analysis, but the
+  slowest it is.
+  </description>
+</property>
+
+<property>
+  <name>lang.extraction.policy</name>
+  <value>detect,identify</value>
+  <description>This determines when the plugin uses detection and
+  statistical identification mechanisms. The order in which the
+  detect and identify are written will determine the extraction
+  policy. Default case (detect,identify)  means the plugin will
+  first try to extract language info from page headers and metadata,
+  if this is not successful it will try using tika language
+  identification. Possible values are:
+    detect
+    identify
+    detect,identify
+    identify,detect
+  </description>
+</property>
+
+<property>
+  <name>lang.identification.only.certain</name>
+  <value>false</value>
+  <description>If set to true with lang.extraction.policy containing identify,
+  the language code returned by Tika will be assigned to the document ONLY
+  if it is deemed certain by Tika.
+  </description>
+</property>
+
+<!-- index-static plugin properties -->
+
+<property>
+  <name>index.static</name>
+  <value></value>
+  <description>
+  A simple plugin called at indexing that adds fields with static data. 
+  You can specify a list of fieldname:fieldcontent per nutch job.
+  It can be useful when collections can't be created by urlpatterns, 
+  like in subcollection, but on a job-basis.
+  </description>
+</property>
+
+<!-- index-metadata plugin properties -->
+
+<property>
+  <name>index.parse.md</name>
+  <value>metatag.description,metatag.keywords</value>
+  <description>
+  Comma-separated list of keys to be taken from the parse metadata to generate fields.
+  Can be used e.g. for 'description' or 'keywords' provided that these values are generated
+  by a parser (see parse-metatags plugin)  
+  </description>
+</property>
+
+<property>
+  <name>index.content.md</name>
+  <value></value>
+  <description>
+   Comma-separated list of keys to be taken from the content metadata to generate fields. 
+  </description>
+</property>
+
+<property>
+  <name>index.db.md</name>
+  <value></value>
+  <description>
+     Comma-separated list of keys to be taken from the crawldb metadata to generate fields.
+     Can be used to index values propagated from the seeds with the plugin urlmeta 
+  </description>
+</property>
+
+<!-- parse-metatags plugin properties -->
+<property>
+  <name>metatags.names</name>
+  <value>description;keywords</value>
+  <description> Names of the metatags to extract, separated by;. 
+  Use '*' to extract all metatags. Prefixes the names with 'metatag.'
+  in the parse-metadata. For instance to index description and keywords, 
+  you need to activate the plugin index-metadata and set the value of the 
+  parameter 'index.parse.md' to 'metatag.description;metatag.keywords'.
+  </description>
+</property>
+
+<!-- Temporary Hadoop 0.17.x workaround. -->
+
+<property>
+  <name>hadoop.job.history.user.location</name>
+  <value>${hadoop.log.dir}/history/user</value>
+  <description>Hadoop 0.17.x comes with a default setting to create
+     user logs inside the output path of the job. This breaks some
+     Hadoop classes, which expect the output to contain only
+     part-XXXXX files. This setting changes the output to a
+     subdirectory of the regular log directory.
+  </description>
+</property>
+
+<!-- linkrank scoring properties -->
+
+<property>
+  <name>link.ignore.internal.host</name>
+  <value>true</value>
+  <description>Ignore outlinks to the same hostname.</description>
+</property>
+
+<property>
+  <name>link.ignore.internal.domain</name>
+  <value>true</value>
+  <description>Ignore outlinks to the same domain.</description>
+</property>
+
+<property>
+  <name>link.ignore.limit.page</name>
+  <value>true</value>
+  <description>Limit to only a single outlink to the same page.</description>
+</property>
+
+<property>
+  <name>link.ignore.limit.domain</name>
+  <value>true</value>
+  <description>Limit to only a single outlink to the same domain.</description>
+</property> 
+
+<property>
+  <name>link.analyze.num.iterations</name>
+  <value>10</value>
+  <description>The number of LinkRank iterations to run.</description>
+</property>
+
+<property>
+  <name>link.analyze.initial.score</name>
+  <value>1.0f</value>
+  <description>The initial score.</description>
+</property>
+
+<property>
+  <name>link.analyze.damping.factor</name>
+  <value>0.85f</value>
+  <description>The damping factor.</description>
+</property>
+
+<property>
+  <name>link.delete.gone</name>
+  <value>false</value>
+  <description>Whether to delete gone pages from the web graph.</description>
+</property>
+
+<property> 
+  <name>link.loops.depth</name>
+  <value>2</value>
+  <description>The depth for the loops algorithm.</description>
+</property>
+
+<property>
+  <name>link.score.updater.clear.score</name>
+  <value>0.0f</value>
+  <description>The default score for URL's that are not in the web graph.</description>
+</property>
+
+<property>
+  <name>mapreduce.fileoutputcommitter.marksuccessfuljobs</name>
+  <value>false</value>
+  <description>Hadoop >= 0.21 generates SUCCESS files in the output which can crash 
+  the readers. This should not be an issue once Nutch is ported to the new MapReduce API
+  but for now this parameter should prevent such cases.
+  </description>
+</property>
+
+<!-- solr index properties -->
+
+<property>
+  <name>solr.mapping.file</name>
+  <value>solrindex-mapping.xml</value>
+  <description>
+  Defines the name of the file that will be used in the mapping of internal
+  nutch field names to solr index fields as specified in the target Solr schema.
+  </description>
+</property>
+
+<property> 
+  <name>solr.commit.size</name>
+  <value>250</value>
+  <description>
+  Defines the number of documents to send to Solr in a single update batch.
+  Decrease when handling very large documents to prevent Nutch from running
+  out of memory. NOTE: It does not explicitly trigger a server side commit.
+  </description>
+</property>
+
+<property>
+  <name>solr.commit.index</name>
+  <value>true</value>
+  <description>
+  When closing the indexer, trigger a commit to the Solr server. 
+  </description>
+</property>
+
+<property>
+  <name>solr.auth</name>
+  <value>false</value>
+  <description>
+  Whether to enable HTTP basic authentication for communicating with Solr.
+  Use the solr.auth.username and solr.auth.password properties to configure
+  your credentials.
+  </description>
+</property>
+
+<!-- subcollection properties -->
+
+<property>
+  <name>subcollection.default.field</name>
+  <value>subcollection</value>
+  <description>
+  The default field name for the subcollections.
+  </description>
+</property>
+
+</configuration>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/nutch-site.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+<property>
+ <name>http.agent.name</name>
+ <value>MPIWG crawler2</value>
+</property>
+<property>
+ <name>urlmeta.tags</name>
+ <value>description</value>
+</property>
+<property>
+ <name>urlmeta.mpiwg</name>
+ <value>first_name,last_name,project_title,project_author,project_author_url,description,main_content,lang,urlNorm</value>
+</property>
+
+
+<property>
+<name>urlmeta.mpiwg-parser</name>
+<value>mpiwg-parser.xml</value>
+</property>
+
+
+<property>
+<name>urlmeta.mpiwg-dom-parser</name>
+<value>mpiwg-dom-parser.xml</value>
+</property>
+
+
+<property>
+  <name>plugin.includes</name>
+  <value>nutch-extensionpoints|protocol-http|urlfilter-regex|parse-(html|tika)|index-(basic|anchor)|scoring-opic|urlnormalizer-(pass|regex|basic)|parse-metatags|parse-mpiwg|parse-MPIWG-metaTag</value>
+  <description>Regular expression naming plugin directory names to
+  include.  Any plugin not matching this expression is excluded.
+  In any case you need at least include the nutch-extensionpoints plugin. By
+  default Nutch includes crawling just HTML and plain text via HTTP,
+  and basic indexing and search plugins.
+  </description>
+</property>
+
+
+<property>
+  <name>http.content.limit</name>
+  <value>200000</value>
+  <description>The length limit for downloaded content using the http://
+  protocol, in bytes. If this value is nonnegative (>=0), content longer
+  than it will be truncated; otherwise, no truncation at all. Do not
+  confuse this setting with the file.content.limit setting.
+  </description>
+</property>
+
+<property>
+  <name>fetcher.server.delay</name>
+  <value>1</value>
+  <description>The number of seconds the fetcher will delay between 
+   successive requests to the same server.</description>
+</property>
+
+</configuration>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/nutch-site.xml.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/parse-plugins.dtd	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,12 @@
+<!ELEMENT parse-plugins  (mimeType+,aliases)>
+<!ELEMENT mimeType (plugin+)>
+<!ATTLIST mimeType name CDATA #REQUIRED>
+
+<!ELEMENT plugin EMPTY>
+<!ATTLIST plugin id CDATA #REQUIRED>
+<!ATTLIST plugin order CDATA ''>
+
+<!ELEMENT aliases (alias+)>
+<!ELEMENT alias EMPTY>
+<!ATTLIST alias name CDATA #REQUIRED>
+<!ATTLIST alias extension-id CDATA #REQUIRED>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/parse-plugins.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+	Licensed to the Apache Software Foundation (ASF) under one or more
+	contributor license agreements.  See the NOTICE file distributed with
+	this work for additional information regarding copyright ownership.
+	The ASF licenses this file to You under the Apache License, Version 2.0
+	(the "License"); you may not use this file except in compliance with
+	the License.  You may obtain a copy of the License at
+	
+	http://www.apache.org/licenses/LICENSE-2.0
+	
+	Unless required by applicable law or agreed to in writing, software
+	distributed under the License is distributed on an "AS IS" BASIS,
+	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	See the License for the specific language governing permissions and
+	limitations under the License.
+	
+	Author     : mattmann 
+	Description: This xml file represents a natural ordering for which parsing 
+	plugin should get called for a particular mimeType. 
+-->
+
+<parse-plugins>
+
+  <!--  by default if the mimeType is set to *, or 
+        if it can't be determined, use parse-tika -->
+	<mimeType name="*">
+	  <plugin id="parse-tika" />
+	</mimeType>
+ 
+	<mimeType name="application/rss+xml">
+	    <plugin id="parse-tika" />
+	    <plugin id="feed" />
+	</mimeType>
+
+	<mimeType name="application/x-bzip2">
+		<!--  try and parse it with the zip parser -->
+		<plugin id="parse-zip" />
+	</mimeType>
+
+	<mimeType name="application/x-gzip">
+		<!--  try and parse it with the zip parser -->
+		<plugin id="parse-zip" />
+	</mimeType>
+
+	<mimeType name="application/x-javascript">
+		<plugin id="parse-js" />
+	</mimeType>
+
+	<mimeType name="application/x-shockwave-flash">
+		<plugin id="parse-swf" />
+	</mimeType>
+
+	<mimeType name="application/zip">
+		<plugin id="parse-zip" />
+	</mimeType>
+
+	<mimeType name="text/html">
+		<plugin id="parse-html" />
+	</mimeType>
+
+        <mimeType name="application/xhtml+xml">
+		<plugin id="parse-html" />
+	</mimeType>
+
+	<mimeType name="text/xml">
+		<plugin id="parse-tika" />
+		<plugin id="feed" />
+	</mimeType>
+
+       <!-- Types for parse-ext plugin: required for unit tests to pass. -->
+
+	<mimeType name="application/vnd.nutch.example.cat">
+		<plugin id="parse-ext" />
+	</mimeType>
+
+	<mimeType name="application/vnd.nutch.example.md5sum">
+		<plugin id="parse-ext" />
+	</mimeType>
+
+	<!--  alias mappings for parse-xxx names to the actual extension implementation 
+	ids described in each plugin's plugin.xml file -->
+	<aliases>
+		<alias name="parse-tika" 
+			extension-id="org.apache.nutch.parse.tika.TikaParser" />
+		<alias name="parse-ext" extension-id="ExtParser" />
+		<alias name="parse-html"
+			extension-id="org.apache.nutch.parse.html.HtmlParser" />
+		<alias name="parse-js" extension-id="JSParser" />
+		<alias name="feed"
+			extension-id="org.apache.nutch.parse.feed.FeedParser" />
+		<alias name="parse-swf"
+			extension-id="org.apache.nutch.parse.swf.SWFParser" />
+		<alias name="parse-zip"
+			extension-id="org.apache.nutch.parse.zip.ZipParser" />
+	</aliases>
+	
+</parse-plugins>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/prefix-urlfilter.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# config file for urlfilter-prefix plugin
+
+http://
+https://
+ftp://
+file://
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/prefix-urlfilter.txt.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# config file for urlfilter-prefix plugin
+
+http://
+https://
+ftp://
+file://
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/regex-normalize.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,72 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- This is the configuration file for the RegexUrlNormalize Class.
+     This is intended so that users can specify substitutions to be
+     done on URLs. The regex engine that is used is Perl5 compatible.
+     The rules are applied to URLs in the order they occur in this file.  -->
+
+<!-- WATCH OUT: an xml parser reads this file an ampersands must be
+     expanded to &amp; -->
+
+<!-- The following rules show how to strip out session IDs, default pages, 
+     interpage anchors, etc. Order does matter!  -->
+<regex-normalize>
+
+<!-- removes session ids from urls (such as jsessionid and PHPSESSID) -->
+<regex>
+  <pattern>(?i)(;?\b_?(l|j|bv_)?(sid|phpsessid|sessionid)=.*?)(\?|&amp;|#|$)</pattern>
+  <substitution>$4</substitution>
+</regex>
+
+<!-- changes default pages into standard for /index.html, etc. into /
+<regex>
+  <pattern>/((?i)index|default)\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\?|&amp;|#|$)</pattern>
+  <substitution>/$3</substitution>
+</regex> -->
+
+<!-- removes interpage href anchors such as site.com#location -->
+<regex>
+  <pattern>#.*?(\?|&amp;|$)</pattern>
+  <substitution>$1</substitution>
+</regex>
+
+<!-- cleans ?&amp;var=value into ?var=value -->
+<regex>
+  <pattern>\?&amp;</pattern>
+  <substitution>\?</substitution>
+</regex>
+
+<!-- cleans multiple sequential ampersands into a single ampersand -->
+<regex>
+  <pattern>&amp;{2,}</pattern>
+  <substitution>&amp;</substitution>
+</regex>
+
+<!-- removes trailing ? -->
+<regex>
+  <pattern>[\?&amp;\.]$</pattern>
+  <substitution></substitution>
+</regex>
+
+<!-- removes duplicate slashes -->
+<regex>
+  <pattern>(?&lt;!:)/{2,}</pattern>
+  <substitution>/</substitution>
+</regex>
+
+</regex-normalize>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/regex-normalize.xml.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,72 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- This is the configuration file for the RegexUrlNormalize Class.
+     This is intended so that users can specify substitutions to be
+     done on URLs. The regex engine that is used is Perl5 compatible.
+     The rules are applied to URLs in the order they occur in this file.  -->
+
+<!-- WATCH OUT: an xml parser reads this file an ampersands must be
+     expanded to &amp; -->
+
+<!-- The following rules show how to strip out session IDs, default pages, 
+     interpage anchors, etc. Order does matter!  -->
+<regex-normalize>
+
+<!-- removes session ids from urls (such as jsessionid and PHPSESSID) -->
+<regex>
+  <pattern>(?i)(;?\b_?(l|j|bv_)?(sid|phpsessid|sessionid)=.*?)(\?|&amp;|#|$)</pattern>
+  <substitution>$4</substitution>
+</regex>
+
+<!-- changes default pages into standard for /index.html, etc. into /
+<regex>
+  <pattern>/((?i)index|default)\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\?|&amp;|#|$)</pattern>
+  <substitution>/$3</substitution>
+</regex> -->
+
+<!-- removes interpage href anchors such as site.com#location -->
+<regex>
+  <pattern>#.*?(\?|&amp;|$)</pattern>
+  <substitution>$1</substitution>
+</regex>
+
+<!-- cleans ?&amp;var=value into ?var=value -->
+<regex>
+  <pattern>\?&amp;</pattern>
+  <substitution>\?</substitution>
+</regex>
+
+<!-- cleans multiple sequential ampersands into a single ampersand -->
+<regex>
+  <pattern>&amp;{2,}</pattern>
+  <substitution>&amp;</substitution>
+</regex>
+
+<!-- removes trailing ? -->
+<regex>
+  <pattern>[\?&amp;\.]$</pattern>
+  <substitution></substitution>
+</regex>
+
+<!-- removes duplicate slashes -->
+<regex>
+  <pattern>(?&lt;!:)/{2,}</pattern>
+  <substitution>/</substitution>
+</regex>
+
+</regex-normalize>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/regex-urlfilter.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# The default url filter.
+# Better for whole-internet crawling.
+
+# Each non-comment, non-blank line contains a regular expression
+# prefixed by '+' or '-'.  The first matching pattern in the file
+# determines whether a URL is included or ignored.  If no pattern
+# matches, the URL is ignored.
+
+# skip file: ftp: and mailto: urls
+-^(file|ftp|mailto):
+
+# skip image and other suffixes we can't yet parse
+# for a more extensive coverage use the urlfilter-suffix plugin
+-\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)$
+
+# skip URLs containing certain characters as probable queries, etc.
+-[?*!@=]
+
+# skip URLs with slash-delimited segment that repeats 3+ times, to break loops
+-.*(/[^/]+)/[^/]+\1/[^/]+\1/
+
+
+# accept anything else
++^http://127.0.0.1:18080/www_neu/.*
+#+^http://127.0.0.1:18080/www_neu/en/staff/index.html
+
+#+^http://127.0.0.1:18080/www_neu/en/staff/members/.*
+
+#+^http://127.0.0.1:18080/www_neu/en/research/index.html
+
+#+^http://127.0.0.1:18080/www_neu/en/research/projects/.*
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/regex-urlfilter.txt.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# The default url filter.
+# Better for whole-internet crawling.
+
+# Each non-comment, non-blank line contains a regular expression
+# prefixed by '+' or '-'.  The first matching pattern in the file
+# determines whether a URL is included or ignored.  If no pattern
+# matches, the URL is ignored.
+
+# skip file: ftp: and mailto: urls
+-^(file|ftp|mailto):
+
+# skip image and other suffixes we can't yet parse
+# for a more extensive coverage use the urlfilter-suffix plugin
+-\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)$
+
+# skip URLs containing certain characters as probable queries, etc.
+-[?*!@=]
+
+# skip URLs with slash-delimited segment that repeats 3+ times, to break loops
+-.*(/[^/]+)/[^/]+\1/[^/]+\1/
+
+# accept anything else
++.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/schema-solr4.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,363 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!--
+    Description: This document contains Solr 4.x schema definition to
+    be used with Solr integration currently build into Nutch.
+    This schema is not minimal, there are some useful field type definitions left,
+    and the set of fields and their flags (indexed/stored/term vectors) can be
+    further optimized depending on needs.  See
+    http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?view=markup
+    for more info.
+-->
+
+<schema name="nutch" version="1.5">
+
+  <types>
+
+    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+
+
+    <!--
+      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+
+    <!--
+     Numeric field types that index each value at various levels of precision
+     to accelerate range queries when the number of values between the range
+     endpoints is large. See the javadoc for NumericRangeQuery for internal
+     implementation details.
+
+     Smaller precisionStep values (specified in bits) will lead to more tokens
+     indexed per value, slightly larger index size, and faster range queries.
+     A precisionStep of 0 disables indexing at different precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+
+    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
+         is a more restricted form of the canonical representation of dateTime
+         http://www.w3.org/TR/xmlschema-2/#dateTime    
+         The trailing "Z" designates UTC time and is mandatory.
+         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+         All other components are mandatory.
+
+         Expressions can also be used to denote calculations that should be
+         performed relative to "NOW" to determine the value, ie...
+
+               NOW/HOUR
+                  ... Round to the start of the current hour
+               NOW-1DAY
+                  ... Exactly 1 day prior to now
+               NOW/DAY+6MONTHS+3DAYS
+                  ... 6 months and 3 days in the future from the start of
+                      the current day
+                      
+         Consult the DateField javadocs for more information.
+
+         Note: For faster range queries, consider the tdate type
+      -->
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
+
+    <!-- A Trie based date field for faster date range queries and date faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
+
+
+    <!-- solr.TextField allows the specification of custom text analyzers
+         specified as a tokenizer and a list of token filters. Different
+         analyzers may be specified for indexing and querying.
+
+         The optional positionIncrementGap puts space between multiple fields of
+         this type on the same document, with the purpose of preventing false phrase
+         matching across fields.
+
+         For more info on customizing your analyzer chain, please see
+         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+     -->
+
+    <!-- A general text field that has reasonable, generic
+         cross-language defaults: it tokenizes with StandardTokenizer,
+	 removes stop words from case-insensitive "stopwords.txt"
+	 (empty by default), and down cases.  At query time only, it
+	 also applies synonyms. -->
+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English: it
+         tokenizes with StandardTokenizer, removes English stop words
+         (stopwords.txt), down cases, protects words from protwords.txt, and
+         finally applies Porter's stemming.  The query time analyzer
+         also applies synonyms from synonyms.txt. -->
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+	<filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+	-->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+	<filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+	-->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English, plus
+	 aggressive word-splitting and autophrase features enabled.
+	 This field is just like text_en, except it adds
+	 WordDelimiterFilter to enable splitting and matching of
+	 words on case-change, alpha numeric boundaries, and
+	 non-alphanumeric chars.  This means certain compound word
+	 cases will work, for example query "wi fi" will match
+	 document "WiFi" or "wi-fi".  However, other cases will still
+	 not match, for example if the query is "wifi" and the
+	 document is "wi fi" or if the query is "wi-fi" and the
+	 document is "wifi".
+        -->
+    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
+         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
+    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
+             possible with WordDelimiterFilter in conjuncton with stemming. -->
+        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Just like text_general except it reverses the characters of
+	 each token, to enable more efficient leading wildcard queries. -->
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!--
+        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
+        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
+        Attributes of the DelimitedPayloadTokenFilterFactory : 
+         "delimiter" - a one character delimiter. Default is | (pipe)
+	 "encoder" - how to encode the following value into a playload
+	    float -> org.apache.lucene.analysis.payloads.FloatEncoder,
+	    integer -> o.a.l.a.p.IntegerEncoder
+	    identity -> o.a.l.a.p.IdentityEncoder
+            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
+         -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- lowercases the entire field value, keeping it as a single token.  -->
+    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory" />
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="url" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+           <filter class="solr.LowerCaseFilterFactory"/>
+           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"/>
+      </analyzer>
+    </fieldType>
+
+
+    <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
+      <analyzer>
+        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- since fields of this type are by default not stored or indexed,
+         any data added to them will be ignored outright.  --> 
+    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
+
+ </types>
+
+ <fields>
+    <field name="id" type="string" stored="true" indexed="true"/>
+
+    <!-- core fields -->
+    <field name="segment" type="string" stored="true" indexed="false"/>
+    <field name="digest" type="string" stored="true" indexed="false"/>
+    <field name="boost" type="float" stored="true" indexed="false"/>
+
+    <!-- fields for index-basic plugin -->
+    <field name="host" type="url" stored="false" indexed="true"/>
+    <field name="url" type="url" stored="true" indexed="true" required="true"/>
+    <!-- stored=true for highlighting, use term vectors  and positions for fast highlighting -->
+    <field name="content" type="text_general" stored="true" indexed="true"/>
+    <field name="title" type="text_general" stored="true" indexed="true"/>
+    <field name="cache" type="string" stored="true" indexed="false"/>
+    <field name="tstamp" type="date" stored="true" indexed="false"/>
+
+    <!-- catch-all field -->
+    <field name="text" type="text_general" stored="false" indexed="true" multiValued="true"/>
+
+    <!-- fields for index-anchor plugin -->
+    <field name="anchor" type="text_general" stored="true" indexed="true"
+        multiValued="true"/>
+
+    <!-- fields for index-more plugin -->
+    <field name="type" type="string" stored="true" indexed="true" multiValued="true"/>
+    <field name="contentLength" type="string" stored="true" indexed="false"/>
+    <field name="lastModified" type="date" stored="true" indexed="false"/>
+    <field name="date" type="tdate" stored="true" indexed="true"/>
+
+    <!-- fields for languageidentifier plugin -->
+    <field name="lang" type="string" stored="true" indexed="true"/>
+
+    <!-- fields for subcollection plugin -->
+    <field name="subcollection" type="string" stored="true" indexed="true" multiValued="true"/>
+
+    <!-- fields for feed plugin (tag is also used by microformats-reltag)-->
+    <field name="author" type="string" stored="true" indexed="true"/>
+    <field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
+    <field name="feed" type="string" stored="true" indexed="true"/>
+    <field name="publishedDate" type="date" stored="true" indexed="true"/>
+    <field name="updatedDate" type="date" stored="true" indexed="true"/>
+
+    <!-- fields for creativecommons plugin -->
+    <field name="cc" type="string" stored="true" indexed="true" multiValued="true"/>
+ </fields>
+ <uniqueKey>id</uniqueKey>
+ <defaultSearchField>text</defaultSearchField>
+ <solrQueryParser defaultOperator="OR"/>
+
+  <!-- copyField commands copy one field to another at the time a document
+        is added to the index.  It's used either to index the same field differently,
+        or to add multiple fields to the same field for easier/faster searching.  -->
+
+ <copyField source="content" dest="text"/>
+ <copyField source="url" dest="text"/>
+ <copyField source="title" dest="text"/>
+ <copyField source="anchor" dest="text"/>
+ <copyField source="author" dest="text"/>
+
+</schema>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/schema.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+    <!--
+        Licensed to the Apache Software Foundation (ASF) under one or
+        more contributor license agreements. See the NOTICE file
+        distributed with this work for additional information regarding
+        copyright ownership. The ASF licenses this file to You under the
+        Apache License, Version 2.0 (the "License"); you may not use
+        this file except in compliance with the License. You may obtain
+        a copy of the License at
+        http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+        applicable law or agreed to in writing, software distributed
+        under the License is distributed on an "AS IS" BASIS, WITHOUT
+        WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+        See the License for the specific language governing permissions
+        and limitations under the License.
+    -->
+    <!--
+        Description: This document contains Solr 3.1 schema definition to
+        be used with Solr integration currently build into Nutch. See
+        https://issues.apache.org/jira/browse/NUTCH-442
+        https://issues.apache.org/jira/browse/NUTCH-699
+        https://issues.apache.org/jira/browse/NUTCH-994
+        https://issues.apache.org/jira/browse/NUTCH-997
+        https://issues.apache.org/jira/browse/NUTCH-1058
+        https://issues.apache.org/jira/browse/NUTCH-1232
+        and
+        http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/
+        example/solr/conf/schema.xml?view=markup
+        for more info.
+    -->
+<schema name="nutch" version="1.5">
+    <types>
+        <fieldType name="string" class="solr.StrField" sortMissingLast="true"
+            omitNorms="true"/> 
+        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
+            omitNorms="true" positionIncrementGap="0"/>
+        <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
+            omitNorms="true" positionIncrementGap="0"/>
+        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
+            omitNorms="true" positionIncrementGap="0"/>
+
+        <fieldType name="text" class="solr.TextField"
+            positionIncrementGap="100">
+            <analyzer>
+                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+                <filter class="solr.StopFilterFactory"
+                    ignoreCase="true" words="stopwords.txt"/>
+                <filter class="solr.WordDelimiterFilterFactory"
+                    generateWordParts="1" generateNumberParts="1"
+                    catenateWords="1" catenateNumbers="1" catenateAll="0"
+                    splitOnCaseChange="1"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+                <filter class="solr.EnglishPorterFilterFactory"
+                    protected="protwords.txt"/>
+                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+            </analyzer>
+        </fieldType>
+        <fieldType name="url" class="solr.TextField"
+            positionIncrementGap="100">
+            <analyzer>
+                <tokenizer class="solr.StandardTokenizerFactory"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+                <filter class="solr.WordDelimiterFilterFactory"
+                    generateWordParts="1" generateNumberParts="1"/>
+            </analyzer>
+        </fieldType>
+    </types>
+    <fields>
+        <field name="id" type="string" stored="true" indexed="true"/>
+
+        <!-- core fields -->
+        <field name="segment" type="string" stored="true" indexed="false"/>
+        <field name="digest" type="string" stored="true" indexed="false"/>
+        <field name="boost" type="float" stored="true" indexed="false"/>
+
+        <!-- fields for index-basic plugin -->
+        <field name="host" type="string" stored="false" indexed="true"/>
+        <field name="url" type="url" stored="true" indexed="true"
+            required="true"/>
+        <field name="content" type="text" stored="false" indexed="true"/>
+        <field name="title" type="text" stored="true" indexed="true"/>
+        <field name="cache" type="string" stored="true" indexed="false"/>
+        <field name="tstamp" type="date" stored="true" indexed="false"/>
+
+        <!-- fields for index-anchor plugin -->
+        <field name="anchor" type="string" stored="true" indexed="true"
+            multiValued="true"/>
+
+        <!-- fields for index-more plugin -->
+        <field name="type" type="string" stored="true" indexed="true"
+            multiValued="true"/>
+        <field name="contentLength" type="long" stored="true"
+            indexed="false"/>
+        <field name="lastModified" type="date" stored="true"
+            indexed="false"/>
+        <field name="date" type="date" stored="true" indexed="true"/>
+
+        <!-- fields for languageidentifier plugin -->
+        <field name="lang" type="string" stored="true" indexed="true"/>
+
+        <!-- fields for subcollection plugin -->
+        <field name="subcollection" type="string" stored="true"
+            indexed="true" multiValued="true"/>
+
+        <!-- fields for feed plugin (tag is also used by microformats-reltag)-->
+        <field name="author" type="string" stored="true" indexed="true"/>
+        <field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
+        <field name="feed" type="string" stored="true" indexed="true"/>
+        <field name="publishedDate" type="date" stored="true"
+            indexed="true"/>
+        <field name="updatedDate" type="date" stored="true"
+            indexed="true"/>
+
+        <!-- fields for creativecommons plugin -->
+        <field name="cc" type="string" stored="true" indexed="true"
+            multiValued="true"/>
+    </fields>
+    <uniqueKey>id</uniqueKey>
+    <defaultSearchField>content</defaultSearchField>
+    <solrQueryParser defaultOperator="OR"/>
+</schema>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/solrindex-mapping.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<mapping>
+	<!-- Simple mapping of fields created by Nutch IndexingFilters
+	     to fields defined (and expected) in Solr schema.xml.
+
+             Any fields in NutchDocument that match a name defined
+             in field/@source will be renamed to the corresponding
+             field/@dest.
+             Additionally, if a field name (before mapping) matches
+             a copyField/@source then its values will be copied to 
+             the corresponding copyField/@dest.
+
+             uniqueKey has the same meaning as in Solr schema.xml
+             and defaults to "id" if not defined.
+         -->
+	<fields>
+		<field dest="content" source="content"/>
+		<field dest="title" source="title"/>
+		<field dest="host" source="host"/>
+		<field dest="segment" source="segment"/>
+		<field dest="boost" source="boost"/>
+		<field dest="digest" source="digest"/>
+		<field dest="tstamp" source="tstamp"/>
+		<field dest="id" source="url"/>
+		<copyField source="url" dest="url"/>
+	</fields>
+	<uniqueKey>id</uniqueKey>
+</mapping>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/subcollections.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<subcollections>
+	<subcollection>
+		<name>nutch</name>
+		<id>nutch</id>
+		<whitelist>
+http://lucene.apache.org/nutch/
+http://wiki.apache.org/nutch/
+                </whitelist>
+		<blacklist />
+	</subcollection>
+</subcollections>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/subcollections.xml.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<subcollections>
+	<subcollection>
+		<name>nutch</name>
+		<id>nutch</id>
+		<whitelist>
+http://lucene.apache.org/nutch/
+http://wiki.apache.org/nutch/
+                </whitelist>
+		<blacklist />
+	</subcollection>
+</subcollections>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/suffix-urlfilter.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,91 @@
+# config file for urlfilter-suffix plugin
+
+# case-insensitive, allow unknown suffixes
++I
+# uncomment the line below to filter on url path
+#+P
+
+### prohibit these
+# pictures
+.gif
+.jpg
+.jpeg
+.bmp
+.png
+.tif
+.tiff
+.ico
+.eps
+.ps
+.wmf
+.fpx
+.cur
+.ani
+.img
+.lwf
+.pcd
+.psp
+.psd
+.tga
+.xbm
+.xpm
+
+# web-formats
+.css
+
+# archives/packages
+.arj
+.arc
+.7z
+.cab
+.lzw
+.lha
+.lzh
+.zip
+.gz
+.tar
+.tgz
+.sit
+.rpm
+.deb
+.pkg
+
+# audio/video
+.mid
+.midi
+.rmi
+.mpeg
+.mpg
+.mpe
+.mp3
+.mp2
+.aac
+.mov
+.fla
+.flv
+.ra
+.ram
+.rm
+.rmv
+.wma
+.wmv
+.wav
+.wave
+.ogg
+.avi
+.au
+.snd
+
+# executables
+.exe
+.com
+
+# windows links
+.lnk
+
+# typo3-extensions
+.t3x
+
+# disc-images
+.iso
+.bin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conf/suffix-urlfilter.txt.template	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,91 @@
+# config file for urlfilter-suffix plugin
+
+# case-insensitive, allow unknown suffixes
++I
+# uncomment the line below to filter on url path
+#+P
+
+### prohibit these
+# pictures
+.gif
+.jpg
+.jpeg
+.bmp
+.png
+.tif
+.tiff
+.ico
+.eps
+.ps
+.wmf
+.fpx
+.cur
+.ani
+.img
+.lwf
+.pcd
+.psp
+.psd
+.tga
+.xbm
+.xpm
+
+# web-formats
+.css
+
+# archives/packages
+.arj
+.arc
+.7z
+.cab
+.lzw
+.lha
+.lzh
+.zip
+.gz
+.tar
+.tgz
+.sit
+.rpm
+.deb
+.pkg
+
+# audio/video
+.mid
+.midi
+.rmi
+.mpeg
+.mpg
+.mpe
+.mp3
+.mp2
+.aac
+.mov
+.fla
+.flv
+.ra
+.ram
+.rm
+.rmv
+.wma
+.wmv
+.wav
+.wave
+.ogg
+.avi
+.au
+.snd
+
+# executables
+.exe
+.com
+
+# windows links
+.lnk
+
+# typo3-extensions
+.t3x
+
+# disc-images
+.iso
+.bin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/default.properties	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name=apache-nutch
+version=1.7-SNAPSHOT
+final.name=${name}-${version}
+year=2012
+
+basedir = ./
+src.dir = ./src/java
+lib.dir = ./lib
+conf.dir = ./conf
+plugins.dir = ./src/plugin
+
+build.dir = ./build
+build.classes = ${build.dir}/classes
+build.plugins = ${build.dir}/plugins
+build.javadoc = ${build.dir}/docs/api
+build.encoding = UTF-8
+build.ivy.dir=${build.dir}/ivy
+build.lib.dir=${build.dir}/lib
+
+test.src.dir = ./src/test
+test.build.dir = ${build.dir}/test
+test.build.data =  ${test.build.dir}/data
+test.build.classes = ${test.build.dir}/classes
+test.build.javadoc = ${test.build.dir}/docs/api
+
+# Proxy Host and Port to use for building JavaDoc
+javadoc.proxy.host=-J-DproxyHost=
+javadoc.proxy.port=-J-DproxyPort=
+javadoc.link.java=http://java.sun.com/javase/6/docs/api/
+javadoc.link.lucene=http://lucene.apache.org/java/2_9_1/api/all
+javadoc.link.hadoop=http://hadoop.apache.org/common/docs/r0.20.2/api/
+javadoc.packages=org.apache.nutch.*
+
+dist.dir=./dist
+src.dist.version.dir=${dist.dir}/${final.name}-src
+bin.dist.version.dir=${dist.dir}/${final.name}-bin
+
+javac.debug=on
+javac.optimize=on
+javac.deprecation=on
+javac.version= 1.6
+
+runtime.dir=./runtime
+runtime.deploy=${runtime.dir}/deploy
+runtime.local=${runtime.dir}/local
+
+ivy.version=2.2.0
+ivy.dir=${basedir}/ivy
+ivy.file=${ivy.dir}/ivy.xml
+ivy.jar=${ivy.dir}/ivy-${ivy.version}.jar
+ivy.repo.url=http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar
+
+ivy.local.default.root=${ivy.default.ivy.user.dir}/local
+ivy.local.default.ivy.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
+ivy.local.default.artifact.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
+
+ivy.shared.default.root=${ivy.default.ivy.user.dir}/shared
+ivy.shared.default.ivy.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
+ivy.shared.default.artifact.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
+
+#
+# Plugins API
+#
+plugins.api=\
+   org.apache.nutch.protocol.http.api*:\
+   org.apache.nutch.urlfilter.api*
+
+#
+# Protocol Plugins
+#
+plugins.protocol=\
+   org.apache.nutch.protocol.file*:\
+   org.apache.nutch.protocol.ftp*:\
+   org.apache.nutch.protocol.http*:\
+   org.apache.nutch.protocol.httpclient*
+
+#
+# URL Filter Plugins
+#
+plugins.urlfilter=\
+   org.apache.nutch.urlfilter.automaton*:\
+   org.apache.nutch.urlfilter.domain*:\
+   org.apache.nutch.urlfilter.domainblacklist*:\
+   org.apache.nutch.urlfilter.prefix*:\
+   org.apache.nutch.urlfilter.regex*\
+   org.apache.nutch.urlfilter.suffix*:\
+   org.apache.nutch.urlfilter.validator*
+
+#
+# URL Normalizer Plugins
+#
+plugins.urlfilter=\
+   org.apache.nutch.net.urlnormalizer.basic*:\
+   org.apache.nutch.net.urlnormalizer.pass*:\
+   org.apache.nutch.net.urlnormalizer.regex*
+
+#
+# Scoring Plugins
+#
+plugins.scoring=\
+   org.apache.nutch.scoring.link*:\
+   org.apache.nutch.scoring.opic*:\
+   org.apache.nutch.scoring.tld*:\
+   org.apache.nutch.scoring.urlmeta*
+   
+#
+# Parse Plugins
+#
+plugins.parse=\
+   org.apache.nutch.parse.ext*:\
+   org.apache.nutch.parse.feed*:\
+   org.apache.nutch.parse.html*:\
+   org.apache.nutch.parse.js:\
+   org.apache.nutch.parse.swf*:\
+   org.apache.nutch.parse.tika:\
+   org.apache.nutch.parse.zip
+   
+#
+# Parse Filter Plugins
+#
+plugins.parse=\
+   org.apache.nutch.parse.headings*
+
+#
+# Indexing Filter Plugins
+#
+plugins.index=\
+   org.apache.nutch.indexer.anchor*:\
+   org.apache.nutch.indexer.basic*:\
+   org.apache.nutch.indexer.feed*:\
+   org.apache.nutch.indexer.metadata*:\
+   org.apache.nutch.indexer.static*:\
+   org.apache.nutch.indexer.subcollection*:\
+   org.apache.nutch.indexer.tld*:\
+   org.apache.nutch.indexer.urlmeta*
+
+#
+# Misc. Plugins
+#
+# (gathers plugins that cannot be dispatched
+# in any category, mainly because they contains
+# many extension points)
+#
+plugins.misc=\
+   org.apache.nutch.collection*:\
+   org.apache.nutch.analysis.lang*:\
+   org.creativecommons.nutch*
+   org.apache.nutch.microformats.reltag*:\
+   
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/build-plugin.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,239 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- Imported by plugin build.xml files to define default targets. -->
+<project xmlns:ivy="antlib:org.apache.ivy.ant">
+
+  <property name="name" value="${ant.project.name}"/>
+  <property name="root" value="${basedir}"/>
+
+  <!-- load plugin-specific properties first -->
+  <property file="${user.home}/${name}.build.properties" />
+  <property file="${root}/build.properties" />
+
+  <property name="nutch.root" location="${root}/../../../"/>
+
+  <property name="src.dir" location="${root}/src/java"/>
+  <property name="src.test" location="${root}/src/test"/>
+
+  <available file="${src.test}" type="dir" property="test.available"/>
+
+  <property name="conf.dir" location="${nutch.root}/conf"/>
+
+  <property name="build.dir" location="${nutch.root}/build/${name}"/>
+  <property name="build.classes" location="${build.dir}/classes"/>
+  <property name="build.test" location="${build.dir}/test"/>
+
+  <property name="deploy.dir" location="${nutch.root}/build/plugins/${name}"/>
+
+  <!-- load nutch defaults last so that they can be overridden above -->
+  <property file="${nutch.root}/default.properties" />
+
+  <ivy:settings id="ivy.instance" file="${nutch.root}/ivy/ivysettings.xml" />
+
+  <path id="plugin.deps"/>
+
+  <fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
+
+  <!-- the normal classpath -->
+  <path id="classpath">
+    <pathelement location="${build.classes}"/>
+    <fileset refid="lib.jars"/>
+    <pathelement location="${nutch.root}/build/classes"/>
+    <fileset dir="${nutch.root}/build/lib">
+      <include name="*.jar" />
+    </fileset>
+    <path refid="plugin.deps"/>
+    <fileset dir="${deploy.dir}">
+      <include name="*.jar" />
+    </fileset>
+  </path>
+
+  <!-- the unit test classpath -->
+  <path id="test.classpath">
+    <pathelement location="${build.test}" />
+    <pathelement location="${nutch.root}/build/test/classes"/>
+    <pathelement location="${nutch.root}/src/test"/>
+    <pathelement location="${conf.dir}"/>
+    <pathelement location="${nutch.root}/build"/>
+    <path refid="classpath"/>
+  </path>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init">
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${build.test}"/>
+    <mkdir dir="${deploy.dir}"/>
+
+    <antcall target="init-plugin"/>
+  </target>
+
+  <!-- to be overridden by sub-projects --> 
+  <target name="init-plugin"/>
+
+  <!--
+   ! Used to build plugin compilation dependencies
+   ! (to be overridden by plugins)
+   !-->
+  <target name="deps-jar"/>
+
+  <!--
+   ! Used to deploy plugin runtime dependencies
+   ! (to be overridden by plugins)
+   !-->
+  <target name="deps-test"/>
+
+  <!-- ====================================================== -->
+  <!-- Compile the Java files                                 -->
+  <!-- ====================================================== -->
+  <target name="compile" depends="init,deps-jar, resolve-default">
+    <echo message="Compiling plugin: ${name}"/>
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${src.dir}"
+     includes="**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <classpath refid="classpath"/>
+    </javac>
+  </target>
+
+  <target name="compile-core">
+    <ant target="compile-core" inheritall="false" dir="${nutch.root}"/>
+    <ant target="compile"/>
+  </target>
+  
+  <!-- ================================================================== -->
+  <!-- Make plugin .jar                                                   -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar" depends="compile">
+    <jar
+      jarfile="${build.dir}/${name}.jar"
+      basedir="${build.classes}"
+    />
+  </target>
+
+  <target name="jar-core" depends="compile-core">
+    <jar
+        jarfile="${build.dir}/${name}.jar"
+        basedir="${build.classes}"
+        />
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Deploy plugin to ${deploy.dir}                                     -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="deploy" depends="jar, deps-test">
+    <mkdir dir="${deploy.dir}"/>
+    <copy file="plugin.xml" todir="${deploy.dir}" 
+          preservelastmodified="true"/>
+    <available property="lib-available"
+                 file="${build.dir}/${name}.jar"/>
+    <antcall target="copy-generated-lib"/>
+    <copy todir="${deploy.dir}" flatten="true">
+      <fileset refid="lib.jars"/>
+    </copy>
+  </target>
+	
+  <target name="copy-generated-lib" if="lib-available">
+    <copy file="${build.dir}/${name}.jar" todir="${deploy.dir}" failonerror="false"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  --> 
+  <!-- ================================================================== -->
+  <target name="compile-test" depends="compile" if="test.available">
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${src.test}"
+     includes="**/*.java"
+     destdir="${build.test}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <classpath refid="test.classpath"/>
+    </javac>    
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     --> 
+  <!-- ================================================================== -->
+  <target name="test" depends="compile-test, deploy" if="test.available">
+    <echo message="Testing plugin: ${name}"/>
+
+    <junit printsummary="yes" haltonfailure="no" fork="yes"
+      errorProperty="tests.failed" failureProperty="tests.failed">
+      <sysproperty key="test.data" value="${build.test}/data"/>
+      <sysproperty key="test.input" value="${root}/data"/>
+      <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/> 
+      <classpath refid="test.classpath"/>
+      <formatter type="plain" />
+      <batchtest todir="${build.test}" unless="testcase">
+        <fileset dir="${src.test}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${build.test}" if="testcase">
+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+
+    <fail if="tests.failed">Tests failed!</fail>
+
+  </target>   
+
+  <!-- target: resolve  ================================================= -->
+  <target name="resolve-default" depends="clean-lib" description="resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="ivy.xml" conf="default" log="download-only"/>
+    <ivy:retrieve pattern="${deploy.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+  </target>
+
+  <target name="resolve-test" depends="clean-lib" description="resolve and retrieve dependencies with ivy">
+    <ivy:resolve file="ivy.xml" conf="test" log="download-only"/>
+    <ivy:retrieve pattern="${deploy.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+  <!-- target: clean  =================================================== -->
+  <target name="clean" depends="clean-build, clean-lib" description="--> clean the project" />
+
+  <!-- target: clean-lib  =============================================== -->
+  <target name="clean-lib" description="--> clean the project libraries directory (dependencies)">
+    <delete includeemptydirs="true" dir="${build.lib.dir}"/>
+  </target>
+
+  <!-- target: clean-build  ============================================= -->
+  <target name="clean-build" description="--> clean the project built files">
+    <delete includeemptydirs="true" dir="${build.dir}"/>
+    <delete includeemptydirs="true" dir="${deploy.dir}"/>
+  </target>
+
+</project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/build.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,165 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="Nutch" default="deploy-core" basedir=".">
+
+  <target name="deploy-core">
+    <ant target="compile-core" inheritall="false" dir="../.."/>
+    <ant target="deploy"/>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Build & deploy all the plugin jars.                    -->
+  <!-- ====================================================== -->
+  <target name="deploy">
+     <ant dir="creativecommons" target="deploy"/>
+     <ant dir="feed" target="deploy"/>
+     <ant dir="headings" target="deploy"/>
+     <ant dir="index-basic" target="deploy"/>
+     <ant dir="index-anchor" target="deploy"/>
+     <ant dir="index-more" target="deploy"/>
+     <ant dir="index-static" target="deploy"/>
+     <ant dir="index-metadata" target="deploy"/>
+     <ant dir="language-identifier" target="deploy"/>
+     <ant dir="lib-http" target="deploy"/>
+     <ant dir="lib-nekohtml" target="deploy"/>
+     <ant dir="lib-regex-filter" target="deploy"/>
+     <ant dir="lib-xml" target="deploy"/>
+     <ant dir="microformats-reltag" target="deploy"/>
+     <ant dir="nutch-extensionpoints" target="deploy"/>
+     <ant dir="protocol-file" target="deploy"/>
+     <ant dir="protocol-ftp" target="deploy"/>
+     <ant dir="protocol-http" target="deploy"/>
+     <ant dir="protocol-httpclient" target="deploy"/>
+     <ant dir="parse-ext" target="deploy"/>
+     <ant dir="parse-js" target="deploy"/>
+     <ant dir="parse-html" target="deploy"/>
+     <ant dir="parse-metatags" target="deploy"/>
+     <ant dir="parse-swf" target="deploy"/>
+     <ant dir="parse-tika" target="deploy"/>
+     <ant dir="parse-zip" target="deploy"/>
+     <ant dir="scoring-depth" target="deploy"/>
+     <ant dir="scoring-opic" target="deploy"/>
+     <ant dir="scoring-link" target="deploy"/>
+     <ant dir="subcollection" target="deploy"/>
+     <ant dir="tld" target="deploy"/>
+     <ant dir="urlfilter-automaton" target="deploy"/>
+     <ant dir="urlfilter-domain" target="deploy" />
+     <ant dir="urlfilter-domainblacklist" target="deploy" />
+     <ant dir="urlfilter-prefix" target="deploy"/>
+     <ant dir="urlfilter-regex" target="deploy"/>
+     <ant dir="urlfilter-suffix" target="deploy"/>
+     <ant dir="urlfilter-validator" target="deploy"/>
+     <ant dir="urlmeta" target="deploy"/>
+     <ant dir="urlnormalizer-basic" target="deploy"/>
+     <ant dir="urlnormalizer-host" target="deploy"/>
+     <ant dir="urlnormalizer-pass" target="deploy"/>
+  	 <ant dir="urlnormalizer-regex" target="deploy"/>
+  	 <ant dir="parse-MPIWG-metaTag" target="deploy"/>
+  	 <ant dir="parse-mpiwg" target="deploy"/>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Test all of the plugins.                               -->
+  <!-- ====================================================== -->
+  <target name="test">
+    <parallel threadCount="2">
+     <ant dir="creativecommons" target="test"/>
+     <ant dir="index-basic" target="test"/>
+     <ant dir="index-anchor" target="test"/>
+     <ant dir="index-more" target="test"/>
+     <ant dir="index-static" target="test"/>
+     <ant dir="language-identifier" target="test"/>
+     <ant dir="lib-http" target="test"/>
+     <ant dir="protocol-file" target="test"/>
+     <ant dir="protocol-httpclient" target="test"/>
+     <!--ant dir="parse-ext" target="test"/-->
+     <ant dir="feed" target="test"/>
+     <ant dir="parse-html" target="test"/>
+     <ant dir="parse-metatags" target="test"/>
+     <ant dir="parse-swf" target="test"/>
+     <ant dir="parse-tika" target="test"/>
+     <ant dir="parse-zip" target="test"/>
+     <ant dir="subcollection" target="test"/>
+     <ant dir="urlfilter-automaton" target="test"/>
+     <ant dir="urlfilter-domain" target="test"/>
+     <ant dir="urlfilter-domainblacklist" target="test"/>
+     <ant dir="urlfilter-regex" target="test"/>
+     <ant dir="urlfilter-suffix" target="test"/>
+     <ant dir="urlfilter-validator" target="test"/>
+     <ant dir="urlnormalizer-basic" target="test"/>
+     <ant dir="urlnormalizer-host" target="test"/>
+     <ant dir="urlnormalizer-pass" target="test"/>
+     <ant dir="urlnormalizer-regex" target="test"/>
+     <ant dir="parse-MPIWG-metaTag" target="test"/>
+     <ant dir="parse-mpiwg" target="test"/>
+    </parallel>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Clean all of the plugins.                              -->
+  <!-- ====================================================== -->
+  <target name="clean">
+    <ant dir="creativecommons" target="clean"/>
+    <ant dir="feed" target="clean"/>
+    <ant dir="headings" target="clean"/>
+    <ant dir="index-basic" target="clean"/>
+    <ant dir="index-anchor" target="clean"/>
+    <ant dir="index-more" target="clean"/>
+    <ant dir="index-static" target="clean"/>
+    <ant dir="index-metadata" target="clean"/>
+    <ant dir="language-identifier" target="clean"/>
+    <ant dir="lib-commons-httpclient" target="clean"/>
+    <ant dir="lib-http" target="clean"/>
+    <ant dir="lib-lucene-analyzers" target="clean"/>
+    <ant dir="lib-nekohtml" target="clean"/>
+    <ant dir="lib-regex-filter" target="clean"/>
+    <ant dir="lib-xml" target="clean"/>
+    <ant dir="microformats-reltag" target="clean"/>
+    <ant dir="nutch-extensionpoints" target="clean"/>
+    <ant dir="protocol-file" target="clean"/>
+    <ant dir="protocol-ftp" target="clean"/>
+    <ant dir="protocol-http" target="clean"/>
+    <ant dir="protocol-httpclient" target="clean"/>
+    <ant dir="parse-ext" target="clean"/>
+    <ant dir="parse-js" target="clean"/>
+    <ant dir="parse-html" target="clean"/>
+    <ant dir="parse-metatags" target="clean"/>
+    <ant dir="parse-swf" target="clean"/>
+    <ant dir="parse-tika" target="clean"/>
+    <ant dir="parse-zip" target="clean"/>
+    <ant dir="scoring-depth" target="clean"/>
+    <ant dir="scoring-opic" target="clean"/>
+    <ant dir="scoring-link" target="clean"/>
+    <ant dir="subcollection" target="clean"/>
+    <ant dir="tld" target="clean"/>
+    <ant dir="urlfilter-automaton" target="clean"/>
+    <ant dir="urlfilter-domain" target="clean" />
+    <ant dir="urlfilter-domainblacklist" target="clean" />
+    <ant dir="urlfilter-prefix" target="clean"/>
+    <ant dir="urlfilter-regex" target="clean"/>
+    <ant dir="urlfilter-suffix" target="clean"/>
+    <ant dir="urlfilter-validator" target="clean"/>
+    <ant dir="urlmeta" target="clean"/>
+    <ant dir="urlnormalizer-host" target="clean"/>
+    <ant dir="urlnormalizer-basic" target="clean"/>
+    <ant dir="urlnormalizer-pass" target="clean"/>
+  	<ant dir="urlnormalizer-regex" target="clean"/>
+  	<ant dir="parse-MPIWG-metaTag" target="clean"/>
+  	<ant dir="parse-mpiwg" target="clean"/>
+  </target>
+</project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-MPIWG-metaTag/build.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+
+<project name="test" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+
+</project>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-MPIWG-metaTag/ivy.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,22 @@
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+    <include file="${nutch.root}/ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-MPIWG-metaTag/plugin.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<plugin
+   id="parse-MPIWG-metaTag"
+   name="URL Meta Indexing Filter MPIWG"
+   version="0.1.0"
+   provider-name="dwinter">
+
+
+   <runtime>
+      <library name="parse-MPIWG-metaTag.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension      id="de.mpiwg.itgroup.indexer.urlmeta"
+                    name="URL Meta Indexing Filter"
+                    point="org.apache.nutch.indexer.IndexingFilter">
+   <implementation id="indexer-urlmeta"
+                    class="de.mpiwg.itgroup.indexer.urlmeta.URLMetaIndexingFilter"/>
+   </extension>
+   <extension      id="de.mpiwg.itgroup.scoring.urlmeta"
+                    name="URL Meta Scoring Filter"
+                      point="org.apache.nutch.scoring.ScoringFilter">
+   <implementation id="scoring-urlmeta"
+                    class="de.mpiwg.itgroup.scoring.urlmeta.URLMetaScoringFilter" />
+   </extension>
+</plugin>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-MPIWG-metaTag/src/java/de/mpiwg/itgroup/indexer/urlmeta/URLMetaIndexingFilter.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,120 @@
+package de.mpiwg.itgroup.indexer.urlmeta;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.indexer.IndexingException;
+import org.apache.nutch.indexer.IndexingFilter;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.indexer.NutchField;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.Parse;
+
+public class URLMetaIndexingFilter implements IndexingFilter {
+
+        private static final Log LOG = LogFactory
+                        .getLog(URLMetaIndexingFilter.class);
+        private static final String CONF_PROPERTY = "urlmeta.tags";
+        private static String[] urlMetaTags;
+        
+        private static final String CONF_PROPERTY_MPIWG = "urlmeta.mpiwg";
+        private static String[] metadDataClasses;
+       
+        private Configuration conf;
+		private FileWriter fw;
+
+        /**
+         * This will take the metatags that you have listed in your "urlmeta.tags"
+         * property, and looks for them inside the CrawlDatum object. If they exist,
+         * this will add it as an attribute inside the NutchDocument.
+         * 
+         * @see IndexingFilter#filter
+         */
+        public NutchDocument filter(NutchDocument doc, Parse parse, Text url,
+                        CrawlDatum datum, Inlinks inlinks) throws IndexingException {
+                if (conf != null)
+                        this.setConf(conf);
+
+                if (urlMetaTags == null || doc == null)
+                        return doc;
+
+                Metadata md = parse.getData().getParseMeta(); // get the metadata
+                
+                
+                for (String metatag : urlMetaTags) {
+                        //Text metadata = (Text) datum.getMetaData().get(new Text(metatag));
+                        //Set<Writable> show = datum.getMetaData().keySet();
+                		
+                	String[] vals = md.getValues(metatag);
+                
+                	
+                	for (int i=0; i<vals.length;i++){
+                		
+                                doc.add(metatag, vals[i]);
+                	}
+                }
+
+                for (String metatag : metadDataClasses) {
+                    //Text metadata = (Text) datum.getMetaData().get(new Text(metatag));
+                    //Set<Writable> show = datum.getMetaData().keySet();
+            		
+            	String[] vals = md.getValues(metatag);
+            	if (vals.length==0) // versuche noch einmal ob metag.metatag exisitiert
+            	{
+            		vals = md.getValues("metatag."+metatag);
+            	}
+            	
+            	for (int i=0; i<vals.length;i++){
+            		
+            		    	try {
+            		    		fw.write("-------------\n");
+            		    		fw.write("URL:"+url);
+            		    		fw.write("-------------\n");
+            					fw.write(vals[i ].toString());
+            					fw.write("\n");
+            					fw.flush();
+            				} catch (IOException e) {
+            					// TODO Auto-generated catch block
+            					e.printStackTrace();
+            				}
+            		    
+                            doc.add(metatag, vals[i]);
+            	}
+            }
+
+                return doc;
+        }
+
+        /** Boilerplate */
+        public Configuration getConf() {
+                return conf;
+        }
+
+        /**
+         * handles conf assignment and pulls the value assignment from the
+         * "urlmeta.tags" property
+         */
+        public void setConf(Configuration conf) {
+                this.conf = conf;
+                
+            	try {
+					fw = new FileWriter("/tmp/out2",true);
+				} catch (IOException e) {
+					// TODO Auto-generated catch block
+					e.printStackTrace();
+				}
+                if (conf == null)
+                        return;
+
+                urlMetaTags = conf.getStrings(CONF_PROPERTY);
+                metadDataClasses = conf.getStrings(CONF_PROPERTY_MPIWG);
+        }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-MPIWG-metaTag/src/java/de/mpiwg/itgroup/scoring/urlmeta/URLMetaScoringFilter.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,170 @@
+package de.mpiwg.itgroup.scoring.urlmeta;
+
+import java.util.Collection;
+import java.util.Map.Entry;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.scoring.ScoringFilter;
+import org.apache.nutch.scoring.ScoringFilterException;
+
+
+/**
+ * For documentation:
+ * 
+ * @see URLMetaIndexingFilter
+ */
+public class URLMetaScoringFilter extends Configured implements ScoringFilter {
+
+  private static final Log LOG = LogFactory.getLog(URLMetaScoringFilter.class);
+  private static final String CONF_PROPERTY = "urlmeta.tags";
+ 
+  
+  private static final String CONF_PROPERTY_MPIWG = "urlmeta.mpiwg";
+  
+  private static String[] metaTags; // enthaelt alle metadaten relevant fuer MPIWG indexing,i.e. spezielle tags in meta und classen im body
+  private Configuration conf;
+
+  /**
+   * This will take the metatags that you have listed in your "urlmeta.tags" and  "urlmeta.mpiwg"
+   * property, and looks for them inside the parseData object. If they exist,
+   * this will be propagated into your 'targets' Collection's ["outlinks"]
+   * attributes.
+   * 
+   * @see ScoringFilter#distributeScoreToOutlinks
+   */
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
+    if (metaTags == null || targets == null || parseData == null)
+      return adjust;
+
+    Iterator<Entry<Text, CrawlDatum>> targetIterator = targets.iterator();
+
+    while (targetIterator.hasNext()) {
+      Entry<Text, CrawlDatum> nextTarget = targetIterator.next();
+
+      for (String metatag : metaTags) {
+        String metaFromParse = parseData.getMeta(metatag);
+
+        if (metaFromParse == null)
+          continue;
+
+        nextTarget.getValue().getMetaData().put(new Text(metatag),
+            new Text(metaFromParse));
+      }
+    }
+    return adjust;
+  }
+
+  /**
+   * Takes the metadata, specified in your "urlmeta.tags"  and "urlmeta.mpiwg" property, from the
+   * datum object and injects it into the content. This is transfered to the
+   * parseData object.
+   * 
+   * @see ScoringFilter#passScoreBeforeParsing
+   * @see URLMetaScoringFilter#passScoreAfterParsing
+   */
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) {
+    if (metaTags == null || content == null || datum == null)
+      return;
+
+    for (String metatag : metaTags) {
+      Text metaFromDatum = (Text) datum.getMetaData().get(new Text(metatag));
+
+      if (metaFromDatum == null)
+        continue;
+
+      content.getMetadata().set(metatag, metaFromDatum.toString());
+    }
+  }
+
+  /**
+   * Takes the metadata, which was lumped inside the content, and replicates it
+   * within your parse data.
+   * 
+   * @see URLMetaScoringFilter#passScoreBeforeParsing
+   * @see ScoringFilter#passScoreAfterParsing
+   */
+  public void passScoreAfterParsing(Text url, Content content, Parse parse) {
+    if (metaTags == null || content == null || parse == null)
+      return;
+
+    for (String metatag : metaTags) {
+      String metaFromContent = content.getMetadata().get(metatag);
+
+      if (metaFromContent == null)
+        continue;
+
+      parse.getData().getParseMeta().set(metatag, metaFromContent);
+    }
+  }
+
+  /** Boilerplate */
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException {
+    return initSort;
+  }
+
+  /** Boilerplate */
+  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
+    return initScore;
+  }
+
+  /** Boilerplate */
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
+    return;
+  }
+
+  /** Boilerplate */
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
+    return;
+  }
+
+  /** Boilerplate */
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List inlinked) throws ScoringFilterException {
+    return;
+  }
+
+  /**
+   * handles conf assignment and pulls the value assignment from the
+   * "urlmeta.tags" property
+   */
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+
+    if (conf == null)
+      return;
+
+    //lade alle metadata typen
+    String[] urlMetaTagsTmp = conf.getStrings(CONF_PROPERTY);
+    //String[] classMetadata = conf.getStrings(CONF_PROPERTY_MPIWG);
+     
+    //metaTags = (String[]) ArrayUtils.addAll(urlMetaTagsTmp, classMetadata);
+    metaTags = urlMetaTagsTmp;
+     
+    
+  }
+
+  /** Boilerplate */
+  public Configuration getConf() {
+    return conf;
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/README.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,17 @@
+Parse-metatags plugin
+
+The parse-metatags plugin consists of a HTMLParserFilter which takes as parameter a list of metatag names with '*' as default value. The values are separated by ';'.
+In order to extract the values of the metatags description and keywords, you must specify in nutch-site.xml
+
+<property>
+  <name>metatags.names</name>
+  <value>description;keywords</value>
+</property>
+
+Prefixes the names with 'metatag.' in the parse-metadata. For instance to index description and keywords, you need to activate the plugin index-metadata and set the value of the parameter 'index.parse.md' to 'metatag.description;metatag.keywords'.
+  
+This code has been developed by DigitalPebble Ltd and offered to the community by ANT.com
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/build.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="parse-mpiwg" default="jar-core">
+
+	<import file="../build-plugin.xml" />
+
+	<!-- Deploy Unit test dependencies -->
+	
+</project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/ivy.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+    <include file="../../..//ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/plugin.xml	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<plugin
+   id="parse-mpiwg"
+   name="MPIWG Parse"
+   version="1.0"
+   provider-name="digitalpebble.com">
+
+   <runtime>
+      <library name="parse-mpiwg.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <extension id="de.mpiwg.itgroup.mpiwg.parse"
+              name="MPIWG Parser"
+              point="org.apache.nutch.parse.HtmlParseFilter">
+      <implementation id="MPIWGParser"
+                      class="de.mpiwg.itgroup.mpiwg.parse.MPIWGParser"/>
+   </extension>
+   
+    <extension id="de.mpiwg.itgroup.mpiwg.parse"
+              name="MPIWG Dom Parser"
+              point="org.apache.nutch.parse.HtmlParseFilter">
+      <implementation id="MPIWGDomParser"
+                      class="de.mpiwg.itgroup.mpiwg.parse.MPIWGDomParser"/>
+   </extension>
+
+</plugin>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/sample/testMetatags.html	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,9 @@
+<html>
+<head>
+<meta name="Keywords" content="This is a test of keywords" />
+<meta name="Description" content="This is a test of description" />
+</head>
+<body>
+text of the document
+</body>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGDomFilter.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,17 @@
+package de.mpiwg.itgroup.mpiwg.parse;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class MPIWGDomFilter {
+
+	public String name;
+	public String tagname;
+	public String tagclass;
+	
+	
+	public MPIWGDomFilter(){
+		
+	}
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGDomParser.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,241 @@
+package de.mpiwg.itgroup.mpiwg.parse;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.HtmlParseFilter;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseResult;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
+
+import org.apache.nutch.protocol.Content;
+import org.apache.taglibs.standard.tag.common.xml.XPathUtil;
+import org.apache.xerces.dom.DocumentFragmentImpl;
+
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import java.io.Reader;
+
+import javax.servlet.jsp.JspTagException;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpression;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+
+public class MPIWGDomParser implements HtmlParseFilter {
+
+	public static final Logger LOG = LoggerFactory.getLogger(MPIWGDomParser.class);
+
+	//public static final String TAG_KEY = "uploader";
+
+	private FileWriter fw;
+
+	public MPIWGDomParser(){
+		 try {
+			fw = new FileWriter("/tmp/out3");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+	// private static final Pattern selectInfoPattern =
+	// Pattern.compile("<span class=\"mpiwg-first_name\">(.*?)</span><span class=\"mpiwg-last_name\">(.*?)</span>");
+	// private Pattern selectInfoPattern = null;
+	// private String[] groupNames = null;
+	// private String lineIdentification=null;
+
+	private Map<String,MPIWGDomFilter> filters = new HashMap<String,MPIWGDomFilter>();
+	private Configuration conf;
+
+	public void setConf(Configuration conf) {
+		this.conf = conf;
+		if (conf == null)
+			return;
+		// the default constructor was called
+
+		String confName = getConf().get("urlmeta.mpiwg-dom-parser");
+		Reader reader = getConf().getConfResourceAsReader(confName);
+
+		// borrowed heavily from code in Configuration.java
+		Document doc;
+		try {
+			doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+					.parse(new InputSource(reader));
+
+			Element root = doc.getDocumentElement();
+			if ((!"mpiwg-dom-parser".equals(root.getTagName()))
+					&& (LOG.isErrorEnabled())) {
+				LOG.error("bad conf file: top-level element not <mpiwg-parser>");
+			}
+
+			// finde all filter
+			NodeList filters = root.getChildNodes();
+			for (int i = 0; i < filters.getLength(); i++) {
+				Node filterNode = filters.item(i);
+				if (!(filterNode instanceof Element))
+					continue;
+				Element filter = (Element) filterNode;
+				if ((!"filter".equals(filter.getTagName()))
+						&& (LOG.isWarnEnabled())) {
+					LOG.warn("bad conf file: element not <filter>");
+				}
+
+				MPIWGDomFilter currentFilter = new MPIWGDomFilter();
+				// gehe jetzt durch die filter
+				NodeList fields = filter.getChildNodes();
+				for (int j = 0; j < fields.getLength(); j++) {
+					Node fieldNode = fields.item(j);
+					if (!(fieldNode instanceof Element))
+						continue;
+					Element field = (Element) fieldNode;
+					if ("name".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.name = ((Text) field.getFirstChild())
+								.getData();
+
+					if ("tagname".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.tagname = ((Text) field
+								.getFirstChild()).getData();
+
+					if ("tagclass".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.tagclass = ((Text) field
+								.getFirstChild()).getData();
+
+					
+				}
+				this.filters.put(currentFilter.name,currentFilter);
+			}
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+	}
+
+	public Configuration getConf() {
+		return this.conf;
+	}
+
+	public ParseResult filter(Content content, ParseResult parseResult,
+			HTMLMetaTags metaTags, DocumentFragment doc) {
+
+		if (conf != null)
+			this.setConf(conf);
+
+		
+		Parse parse2 = parseResult.get(content.getUrl());
+		Metadata metadata2 = parse2.getData().getParseMeta();
+		
+		// normalisiere url (index.html und index_html) an Ende werden geloescht
+		String urlNorm = content.getUrl().replace("index.html", "").replace("index_html","");
+		if (urlNorm.endsWith("/")){
+			urlNorm=urlNorm.substring(0,urlNorm.length()-1);
+		}
+		
+		metadata2.add("urlNorm", urlNorm);
+		
+		// language
+		if (urlNorm.contains("/en/")){
+			metadata2.add("lang", "en");
+		} else {
+			metadata2.add("lang", "de");
+		}
+		
+		for (String currentFilterName : filters.keySet()) {
+			MPIWGDomFilter currentFilter = filters.get(currentFilterName);
+			if (currentFilter.tagname == null) // kein pattern gesetzt
+				return parseResult;
+
+		
+		
+			// ddoc.get
+			DocumentFragmentImpl d = (DocumentFragmentImpl)doc;
+			
+			
+			XPath xp;
+			NodeList res;
+			xp = XPathFactory.newInstance().newXPath();
+			XPathExpression g;
+			try {
+				g = xp.compile("//DIV[@class=\"main\"]");
+				
+				res  = (NodeList)g.evaluate(d,XPathConstants.NODESET);
+			} catch (XPathExpressionException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				return parseResult;
+			}	
+			
+			
+			String text = new String("");
+			for (int i=0;i<res.getLength();i++){
+				Node n =res.item(i);
+				
+				text = text+new String(n.getTextContent());
+				
+			}
+			
+			
+			
+			
+			Parse parse = parseResult.get(content.getUrl());
+			Metadata metadata = parse.getData().getParseMeta();
+			try {
+				fw.write("-------------\n");
+				fw.write("URL:"+content.getUrl());
+				fw.write("-------------\n");
+				fw.write(text);
+				fw.write("\n");
+				fw.flush();
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+			metadata.add(currentFilterName, text);
+			try {
+				fw.write("XXXXXXXXXXXXXX\n");
+				fw.write(metadata.get(currentFilterName));
+				fw.write("\n");
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+
+
+			
+
+		}
+		return parseResult;
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGFilter.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,19 @@
+package de.mpiwg.itgroup.mpiwg.parse;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class MPIWGFilter {
+
+	public String name;
+	public String searchPattern="";
+	public List<String> groupNames;
+	public String lineIdentification;
+	public int mutiline;
+	
+	
+	public MPIWGFilter(){
+		groupNames= new ArrayList<String>();
+	}
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/src/java/de/mpiwg/itgroup/mpiwg/parse/MPIWGParser.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,225 @@
+package de.mpiwg.itgroup.mpiwg.parse;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.apache.nutch.parse.HTMLMetaTags;
+import org.apache.nutch.parse.HtmlParseFilter;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseResult;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
+
+import org.apache.nutch.protocol.Content;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import java.io.Reader;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+public class MPIWGParser implements HtmlParseFilter {
+
+	public static final Logger LOG = LoggerFactory.getLogger(MPIWGParser.class);
+
+	public static final String TAG_KEY = "uploader";
+
+	private FileWriter fw;
+
+	public MPIWGParser(){
+		 try {
+			fw = new FileWriter("/tmp/out");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+	// private static final Pattern selectInfoPattern =
+	// Pattern.compile("<span class=\"mpiwg-first_name\">(.*?)</span><span class=\"mpiwg-last_name\">(.*?)</span>");
+	// private Pattern selectInfoPattern = null;
+	// private String[] groupNames = null;
+	// private String lineIdentification=null;
+
+	private Map<String,MPIWGFilter> filters = new HashMap<String,MPIWGFilter>();
+	private Configuration conf;
+
+	public void setConf(Configuration conf) {
+		this.conf = conf;
+		if (conf == null)
+			return;
+		// the default constructor was called
+
+		String confName = getConf().get("urlmeta.mpiwg-parser");
+		Reader reader = getConf().getConfResourceAsReader(confName);
+
+		// borrowed heavily from code in Configuration.java
+		Document doc;
+		try {
+			doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
+					.parse(new InputSource(reader));
+
+			Element root = doc.getDocumentElement();
+			if ((!"mpiwg-parser".equals(root.getTagName()))
+					&& (LOG.isErrorEnabled())) {
+				LOG.error("bad conf file: top-level element not <mpiwg-parser>");
+			}
+
+			// finde all filter
+			NodeList filters = root.getChildNodes();
+			for (int i = 0; i < filters.getLength(); i++) {
+				Node filterNode = filters.item(i);
+				if (!(filterNode instanceof Element))
+					continue;
+				Element filter = (Element) filterNode;
+				if ((!"filter".equals(filter.getTagName()))
+						&& (LOG.isWarnEnabled())) {
+					LOG.warn("bad conf file: element not <filter>");
+				}
+
+				MPIWGFilter currentFilter = new MPIWGFilter();
+				// gehe jetzt durch die filter
+				NodeList fields = filter.getChildNodes();
+				currentFilter.mutiline=-1;
+				for (int j = 0; j < fields.getLength(); j++) {
+					Node fieldNode = fields.item(j);
+					if (!(fieldNode instanceof Element))
+						continue;
+					Element field = (Element) fieldNode;
+					if ("name".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.name = ((Text) field.getFirstChild())
+								.getData();
+
+					if ("searchPattern".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.searchPattern = ((Text) field
+								.getFirstChild()).getData();
+
+					if ("line-identification".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.lineIdentification = ((Text) field
+								.getFirstChild()).getData();
+
+					if ("multiline".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.mutiline = Integer.valueOf(((Text) field
+								.getFirstChild()).getData());
+
+					if ("group-name".equals(field.getTagName())
+							&& field.hasChildNodes())
+						currentFilter.groupNames.add(((Text) field
+								.getFirstChild()).getData());
+
+				}
+				this.filters.put(currentFilter.name,currentFilter);
+			}
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+	}
+
+	public Configuration getConf() {
+		return this.conf;
+	}
+
+	public ParseResult filter(Content content, ParseResult parseResult,
+			HTMLMetaTags metaTags, DocumentFragment doc) {
+
+		if (conf != null)
+			this.setConf(conf);
+
+		for (String currentFilterName : filters.keySet()) {
+			MPIWGFilter currentFilter = filters.get(currentFilterName);
+			if (currentFilter.searchPattern == null) // kein pattern gesetzt
+				return parseResult;
+
+			Pattern pattern = Pattern.compile(currentFilter.searchPattern,Pattern.DOTALL);
+			BufferedReader reader;
+			try {
+				reader = new BufferedReader(new InputStreamReader(
+						new ByteArrayInputStream(content.getContent()),"utf-8"));
+			} catch (UnsupportedEncodingException e1) {
+				LOG.debug("unsupported encoding!");
+				return parseResult;
+				
+			}
+
+			String line;
+
+			Map<String, String> tags = new HashMap<String, String>();
+			try {
+				while ((line = reader.readLine()) != null) {
+
+					if (line.contains(currentFilter.lineIdentification)) {
+						
+						//Multiline matching first collet lines
+						
+						int count = 0;
+						
+						String line2;
+						// gehe durch multiline if multiline >0
+						while ( ((line2 = reader.readLine()) != null) & (count<currentFilter.mutiline)) {
+							count++;
+							line+=line2;
+						}
+						
+						Matcher m = pattern.matcher(line);
+						if (m.find()) {
+							for (int i = 0; i < currentFilter.groupNames.size(); i++)
+								tags.put(currentFilter.groupNames.get(i), m
+										.group(i + 1).trim()); // ordne
+																// groupnamen
+																// gruppen zu
+							// LOG.debug(Adding tag: m.group(1));
+							// tags.put("first_name", m.group(1));
+							// tags.put("last_name", m.group(2));
+						}
+					}
+				}
+				reader.close();
+			} catch (IOException e) {
+				LOG.warn("IOException encountered parsing file:", e);
+			}
+			Parse parse = parseResult.get(content.getUrl());
+			Metadata metadata = parse.getData().getParseMeta();
+			for (String tag : tags.keySet()) {
+				try {
+					fw.write(String.format("%s - %s", tag, tags.get(tag)));
+					fw.flush();
+				} catch (IOException e) {
+					// TODO Auto-generated catch block
+					e.printStackTrace();
+				}
+				metadata.add(tag, tags.get(tag));
+			}
+
+		}
+		return parseResult;
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/parse-mpiwg/src/test/org/apache/nutch/parse/html/TestMetatagParser.java	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.parse.html;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.Protocol;
+import org.apache.nutch.protocol.ProtocolFactory;
+import org.apache.nutch.util.NutchConfiguration;
+
+public class TestMetatagParser extends TestCase {
+  
+  private String fileSeparator = System.getProperty("file.separator");
+  private String sampleDir = System.getProperty("test.data", ".");
+  private String sampleFile = "testMetatags.html";
+  private String description = "This is a test of description";
+  private String keywords = "This is a test of keywords";
+  
+  public TestMetatagParser(String name) {
+    super(name);
+  }
+  
+  public void testIt() {
+    Configuration conf = NutchConfiguration.create();
+    
+    String urlString = "file:" + sampleDir + fileSeparator + sampleFile;
+    
+    try {
+      Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
+      Content content = protocol.getProtocolOutput(new Text(urlString),
+          new CrawlDatum()).getContent();
+      
+      Parse parse = new ParseUtil(conf).parse(content).get(content.getUrl());
+      
+      // check that we get the same values
+      Metadata parseMeta = parse.getData().getParseMeta();
+      
+      assertEquals(description, parseMeta.get("metatag.description"));
+      assertEquals(keywords, parseMeta.get("metatag.keywords"));
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.toString());
+    }
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugin/plugin.dtd	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one or more
+ ! contributor license agreements.  See the NOTICE file distributed with
+ ! this work for additional information regarding copyright ownership.
+ ! The ASF licenses this file to You under the Apache License, Version 2.0
+ ! (the "License"); you may not use this file except in compliance with
+ ! the License.  You may obtain a copy of the License at
+ !
+ !     http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing, software
+ ! distributed under the License is distributed on an "AS IS" BASIS,
+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ! See the License for the specific language governing permissions and
+ ! limitations under the License.
+ !
+ !
+ !  Document   : plugin.dtd
+ !  Created on : 14 avril 2006, 22:14
+ !  Author     : Chris Mattmann, Jerome Charron
+ !  Description: Nutch plug-in manifest DTD
+ !
+ !  PUBLIC ID  : -//Apache Software Fundation//DTD Nutch Plugin Manifest 1.0//EN
+ !  SYSTEM ID  : http://lucene.apache.org/nutch/plugin.dtd
+-->
+
+
+
+<!--
+ ! The <plugin> element defines the body of the manifest.
+ ! It optionally contains definitions for the plug-in runtime,
+ ! definitions of other plug-ins required by this one,
+ ! declarations of any new extension points being introduced by the plug-in,
+ ! as well as configuration of functional extensions
+ ! (configured into extension points defined by other plug-ins,
+ ! or introduced by this plug-in).
+ !-->
+<!ELEMENT plugin (runtime?, requires?, extension-point*, extension*)>
+
+<!-- A user displayable name for the plug-in -->
+<!ATTLIST plugin name CDATA #REQUIRED>
+
+<!-- 
+ ! A unique identifier for the plug-in.
+ ! To minimize potential for naming collisions,
+ ! the identifier should be derived from the internet domain id
+ ! of the supplying provider (reversing the domain name tokens and
+ ! appending additional name tokens separated by dot [.]).
+ ! For example, provider nutch.org could define plug-in identifier
+ ! org.nutch.myplugin
+ !-->
+<!ATTLIST plugin id CDATA #REQUIRED>
+
+<!--
+ ! The plug-in version number.
+ ! NOTE : Version numbers compatibility are not yet implemented.
+ !-->
+<!ATTLIST plugin version CDATA #REQUIRED>
+
+<!-- The user-displayable name of the provider supplying the plug-in. -->
+<!ATTLIST plugin provider-name CDATA #IMPLIED>
+
+<!--
+ ! The name of the plug-in class for this plug-in.
+ ! The class must be a subclass of org.apache.nutch.plugin.Plugin
+ !-->
+<!ATTLIST plugin class CDATA #IMPLIED>
+
+
+<!-- 
+ ! The <requires> section of the manifest declares
+ ! any dependencies on other plug-ins.
+ !-->
+<!ELEMENT requires (import+)>
+
+
+<!-- Each dependency is specified using an <import> element. -->
+<!ELEMENT import EMPTY>
+
+<!-- The identifier of the required plug-in. -->
+<!ATTLIST import plugin CDATA #REQUIRED>
+
+
+<!--
+ ! The <runtime> section of the manifest contains a definition of one or more
+ ! libraries that make up the plug-in runtime.
+ ! The referenced libraries are used by the plugin execution mechanisms
+ ! (the plug-in class loader) to load and execute the correct code required by
+ ! the plug-in.
+ !-->
+<!ELEMENT runtime (library+)>
+
+
+<!--
+ !The <library> elements collectively define the plug-in runtime.
+ ! At least one <library> must be specified.
+ !-->
+<!ELEMENT library (export*)>
+
+<!--
+ ! A string reference to a library file or directory containing classes
+ ! (relative to the plug-in install directory).
+ ! Directory references must contain trailing file separator.
+ !-->
+<!ATTLIST library name CDATA #REQUIRED>
+
+
+<!--
+ ! Each <library> element can specify which portion
+ ! of the library should be exported.
+ ! The export rules are specified as a set of export masks.
+ ! By default (no export rules specified),
+ ! the library is considered to be private.
+ ! Each export mask is specified using the name attribute.
+ !-->
+<!ELEMENT export EMPTY>
+
+<!--
+ ! The export mask can have the following values:
+ !   * - indicates all contents of library are exported (public)
+ !   package.name.* - indicates all classes in the specified package
+ !                    are exported. The matching rules are the same as in the
+ !                    Java import statement.
+ !   package.name.ClassName - fully qualified java class name
+ !
+ ! NOTE : export mask is not yet implemented in Nutch.
+ !-->
+<!ATTLIST export name CDATA #REQUIRED>
+
+
+<!--
+ ! Nutch's architecture is based on the notion of configurable extension points.
+ ! Nutch itself predefines a set of extension points that cover the task of
+ ! extending it (for example, adding parser, indexing filter, ...).
+ ! In addition to the predefined extension points, each supplied plug-in can
+ ! declare additional extension points. By declaring an extension point the
+ ! plug-in is essentially advertising the ability to configure the plug-in
+ ! function with externally supplied extensions.
+ !-->
+<!ELEMENT extension-point EMPTY>
+
+<!-- A user-displayable name for the extension point. -->
+<!ATTLIST extension-point name CDATA #REQUIRED>
+
+<!-- A simple id, unique within this plug-in -->
+<!ATTLIST extension-point id CDATA #REQUIRED>
+
+
+<!--
+ ! Actual extensions are configured into extension points
+ ! (predefined, or newly declared in this plug-in) in the <extension> section.
+ !
+ ! The configuration information is specified by at least one implementation
+ ! with some parameters.
+ !-->
+<!ELEMENT extension (implementation+)>
+
+<!-- 
+ ! A reference to an extension point being configured.
+ ! The extension point can be one defined in this plug-in or another plug-in.
+ !-->
+<!ATTLIST extension point CDATA #REQUIRED>
+
+<!--
+ ! Optional identifier for this extension point configuration instance.
+ ! This is used by extension points that need to uniquely identify
+ ! (rather than just enumerate) the specific configured extensions.
+ ! The identifier is specified as a simple token unique within the definition
+ ! of the declaring plug-in. When used globally, the extension identifier
+ ! is qualified by the plug-in identifier.
+ ! FIXME : Seems it is never read in the code.
+ !-->
+<!ATTLIST extension id CDATA #IMPLIED>
+
+<!--
+ ! A user-displayable name for the extension.
+ ! FIXME : Seems it is never read in the code.
+ !-->
+<!ATTLIST extension name CDATA #IMPLIED>
+
+
+<!--
+ ! Defines a specific implementation for the extension.
+ ! This implementation can define some special name/value parameters
+ ! used at runtime.
+ !-->
+<!ELEMENT implementation (parameter*)>
+
+<!-- A unique identifier for this implementation -->
+<!ATTLIST implementation id CDATA #REQUIRED>
+
+<!-- The fully-qualified Java Class that implements this extension-point -->
+<!ATTLIST implementation class CDATA #REQUIRED>
+
+
+<!-- Defines a name/value parameter -->
+<!ELEMENT parameter EMPTY>
+
+<!-- The parameter's name (should be unique for an extension) -->
+<!ATTLIST parameter name CDATA #REQUIRED>
+
+<!-- The parameter's value -->
+<!ATTLIST parameter value CDATA #REQUIRED> 
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/urls/seed.txt	Tue Feb 26 15:50:30 2013 +0100
@@ -0,0 +1,1 @@
+http://127.0.0.1:18080/www_neu/en/staff/members/vagnew/index_html
\ No newline at end of file