# HG changeset patch # User dwinter # Date 1288783996 -3600 # Node ID dc7622afcfea84c87397fa1208e327119453d32f initial diff -r 000000000000 -r dc7622afcfea .classpath --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.classpath Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea .project --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.project Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + fulltextIndexer + + + + + + org.eclipse.wst.common.project.facet.core.builder + + + + + org.eclipse.jdt.core.javabuilder + + + + + edu.mit.csail.relo.jdt.builder.ReloJDTBuilder + + + + + org.eclipse.wst.validation.validationbuilder + + + + + + org.eclipse.jdt.core.javanature + edu.mit.csail.relo.jdt.builder.ReloJDTBuilderProjectNature + org.eclipse.wst.common.project.facet.core.nature + org.eclipse.wst.common.modulecore.ModuleCoreNature + org.eclipse.jem.workbench.JavaEMFNature + + diff -r 000000000000 -r dc7622afcfea .settings/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 50 +/svn/!svn/ver/2621/fulltextIndexer/trunk/.settings +END +org.eclipse.wst.common.project.facet.core.xml +K 25 +svn:wc:ra_dav:version-url +V 96 +/svn/!svn/ver/2873/fulltextIndexer/trunk/.settings/org.eclipse.wst.common.project.facet.core.xml +END +org.eclipse.wst.common.component +K 25 +svn:wc:ra_dav:version-url +V 83 +/svn/!svn/ver/2873/fulltextIndexer/trunk/.settings/org.eclipse.wst.common.component +END +org.eclipse.jdt.core.prefs +K 25 +svn:wc:ra_dav:version-url +V 77 +/svn/!svn/ver/2621/fulltextIndexer/trunk/.settings/org.eclipse.jdt.core.prefs +END diff -r 000000000000 -r dc7622afcfea .settings/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,137 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/.settings +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +org.eclipse.jdt.core.prefs +file + + + + +2010-06-16T08:04:56.000000Z +a055e9715f4064c5f73ae4384e1c83ff +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +618 + +org.eclipse.wst.common.component +file +2873 + + + +2010-07-12T19:40:15.000000Z +5c43faf0bf17bc53f6f3481304cb7cd9 +2010-11-03T11:29:50.531613Z +2873 +dwinter + + + + + + + + + + + + + + + + + + + + + +239 + +org.eclipse.wst.common.project.facet.core.xml +file +2873 + + + +2010-07-12T19:40:15.000000Z +c96151f54bf7ac99289140db4afa9b41 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +230 + diff -r 000000000000 -r dc7622afcfea .settings/.svn/prop-base/org.eclipse.jdt.core.prefs.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/prop-base/org.eclipse.jdt.core.prefs.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea .settings/.svn/prop-base/org.eclipse.wst.common.project.facet.core.xml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/prop-base/org.eclipse.wst.common.project.facet.core.xml.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea .settings/.svn/text-base/org.eclipse.jdt.core.prefs.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/text-base/org.eclipse.jdt.core.prefs.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,12 @@ +#Wed Jun 16 10:04:56 CEST 2010 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.6 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.6 diff -r 000000000000 -r dc7622afcfea .settings/.svn/text-base/org.eclipse.wst.common.component.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/text-base/org.eclipse.wst.common.component.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r dc7622afcfea .settings/.svn/text-base/org.eclipse.wst.common.project.facet.core.xml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/.svn/text-base/org.eclipse.wst.common.project.facet.core.xml.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,7 @@ + + + + + + + diff -r 000000000000 -r dc7622afcfea .settings/org.eclipse.jdt.core.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.jdt.core.prefs Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,12 @@ +#Wed Jun 16 10:04:56 CEST 2010 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.6 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.6 diff -r 000000000000 -r dc7622afcfea .settings/org.eclipse.wst.common.component --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.wst.common.component Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r dc7622afcfea .settings/org.eclipse.wst.common.project.facet.core.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.settings/org.eclipse.wst.common.project.facet.core.xml Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,7 @@ + + + + + + + diff -r 000000000000 -r dc7622afcfea .svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,35 @@ +K 25 +svn:wc:ra_dav:version-url +V 40 +/svn/!svn/ver/2620/fulltextIndexer/trunk +END +catalog.xml +K 25 +svn:wc:ra_dav:version-url +V 52 +/svn/!svn/ver/2873/fulltextIndexer/trunk/catalog.xml +END +.project +K 25 +svn:wc:ra_dav:version-url +V 49 +/svn/!svn/ver/2873/fulltextIndexer/trunk/.project +END +.classpath +K 25 +svn:wc:ra_dav:version-url +V 51 +/svn/!svn/ver/2873/fulltextIndexer/trunk/.classpath +END +model.uml +K 25 +svn:wc:ra_dav:version-url +V 50 +/svn/!svn/ver/2621/fulltextIndexer/trunk/model.uml +END +model.umldi +K 25 +svn:wc:ra_dav:version-url +V 52 +/svn/!svn/ver/2621/fulltextIndexer/trunk/model.umldi +END diff -r 000000000000 -r dc7622afcfea .svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,498 @@ +10 + +dir +2620 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:12:45.887295Z +2620 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + +.classpath +file +2873 + + + +2010-07-12T19:48:13.000000Z +d33a9b722757a0fd7a9aaba93d54e716 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +947 + +.project +file +2873 + + + +2010-07-12T19:40:15.000000Z +dce2778d271533ad76e780a5f120b0d1 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1036 + +.settings +dir + +catalog.xml +file +2873 + + + +2010-06-16T18:30:41.000000Z +defcc90d6d4ae4f1992eb6e072597d86 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +480 + +doc +dir + +jaxen-1.1.1.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +jdom-1.0.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +libs +dir + +model.uml +file +2621 + + + +2010-06-16T12:28:49.000000Z +1f34cd7cc662d2f08ab9f9eecc98cdf5 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +71194 + +model.umldi +file +2621 + + + +2010-06-16T12:50:26.000000Z +f762ff855d639e5308d4e9b3b17b65a6 +2010-06-16T19:13:23.990503Z +2621 +dwinter + + + + + + + + + + + + + + + + + + + + + +710 + +src +dir + +ws-commons-util-1.0.2.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-client-3.1-javadoc.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-client-3.1-sources.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-client-3.1.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-common-3.1-javadoc.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-common-3.1-sources.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-common-3.1.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-server-3.1-javadoc.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-server-3.1-sources.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + +xmlrpc-server-3.1.jar +file +2873 + + + + + + + + + + + + + + + + + + + +deleted + diff -r 000000000000 -r dc7622afcfea .svn/prop-base/.classpath.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/prop-base/.classpath.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea .svn/prop-base/.project.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/prop-base/.project.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea .svn/prop-base/catalog.xml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/prop-base/catalog.xml.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea .svn/prop-base/model.uml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/prop-base/model.uml.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea .svn/text-base/.classpath.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/text-base/.classpath.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea .svn/text-base/.project.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/text-base/.project.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + fulltextIndexer + + + + + + org.eclipse.wst.common.project.facet.core.builder + + + + + org.eclipse.jdt.core.javabuilder + + + + + edu.mit.csail.relo.jdt.builder.ReloJDTBuilder + + + + + org.eclipse.wst.validation.validationbuilder + + + + + + org.eclipse.jdt.core.javanature + edu.mit.csail.relo.jdt.builder.ReloJDTBuilderProjectNature + org.eclipse.wst.common.project.facet.core.nature + org.eclipse.wst.common.modulecore.ModuleCoreNature + org.eclipse.jem.workbench.JavaEMFNature + + diff -r 000000000000 -r dc7622afcfea .svn/text-base/catalog.xml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/text-base/catalog.xml.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,16 @@ + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r dc7622afcfea .svn/text-base/model.uml.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/text-base/model.uml.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,479 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + +
+ + + + + + + +
+ + + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + +
+ + + + + + + +
+ + + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + + + +
+ + + + + + +
+ + + + + + + + + + + +
+ + + + +
+ + + + +
+ + + + + + + +
+ + + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea .svn/text-base/model.umldi.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.svn/text-base/model.umldi.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ + + + + + + + + + diff -r 000000000000 -r dc7622afcfea catalog.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/catalog.xml Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,16 @@ + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r dc7622afcfea doc/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,77 @@ +K 25 +svn:wc:ra_dav:version-url +V 44 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc +END +overview-frame.html +K 25 +svn:wc:ra_dav:version-url +V 64 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/overview-frame.html +END +deprecated-list.html +K 25 +svn:wc:ra_dav:version-url +V 65 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/deprecated-list.html +END +stylesheet.css +K 25 +svn:wc:ra_dav:version-url +V 59 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/stylesheet.css +END +allclasses-frame.html +K 25 +svn:wc:ra_dav:version-url +V 66 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/allclasses-frame.html +END +allclasses-noframe.html +K 25 +svn:wc:ra_dav:version-url +V 68 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/allclasses-noframe.html +END +package-list +K 25 +svn:wc:ra_dav:version-url +V 57 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/package-list +END +index.html +K 25 +svn:wc:ra_dav:version-url +V 55 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index.html +END +overview-summary.html +K 25 +svn:wc:ra_dav:version-url +V 66 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/overview-summary.html +END +overview-tree.html +K 25 +svn:wc:ra_dav:version-url +V 63 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/overview-tree.html +END +serialized-form.html +K 25 +svn:wc:ra_dav:version-url +V 65 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/serialized-form.html +END +help-doc.html +K 25 +svn:wc:ra_dav:version-url +V 58 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/help-doc.html +END +constant-values.html +K 25 +svn:wc:ra_dav:version-url +V 65 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/constant-values.html +END diff -r 000000000000 -r dc7622afcfea doc/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,452 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +allclasses-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +0c636f51a17ff496c1d97568db0bf538 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5190 + +allclasses-noframe.html +file + + + + +2010-06-28T09:04:53.000000Z +2dd3b821274cd56afe2d776c6011b842 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +4690 + +constant-values.html +file + + + + +2010-06-28T09:04:53.000000Z +714ab500abcd7805ff5187760cfb81e2 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13218 + +de +dir + +deprecated-list.html +file + + + + +2010-06-28T09:04:53.000000Z +144335951603b201554203b9624f0312 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5095 + +help-doc.html +file + + + + +2010-06-28T09:04:53.000000Z +dbb5eaa9e6841b48b6e61299c02f0200 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +9735 + +index-files +dir + +index.html +file + + + + +2010-06-28T09:04:53.000000Z +b29583865bd31d184b5dcf0dfb12dfb5 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1416 + +overview-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +95625e60968c2fefcd7358e446abd907 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +2458 + +overview-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +94a11afaa5269a687d4d8348e82776aa +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7138 + +overview-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +4ee927836bd3f811a07fb6d57a3c4bbf +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13235 + +package-list +file + + + + +2010-06-28T09:04:52.000000Z +abc8718accd94ec75803bcba68c0e415 +2010-06-28T15:47:27.673424Z +2662 +dwinter + + + + + + + + + + + + + + + + + + + + + +389 + +resources +dir + +serialized-form.html +file + + + + +2010-06-28T09:04:53.000000Z +bd9f1a0076df7af0d8ca0aeec55c16ac +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6108 + +stylesheet.css +file + + + + +2010-06-28T09:04:53.000000Z +6ea3088c740a86ddc743f62573f18a42 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1391 + diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/allclasses-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/allclasses-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/allclasses-noframe.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/allclasses-noframe.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/constant-values.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/constant-values.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/deprecated-list.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/deprecated-list.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/help-doc.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/help-doc.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/index.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/index.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/overview-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/overview-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/overview-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/overview-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/overview-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/overview-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/serialized-form.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/serialized-form.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/prop-base/stylesheet.css.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/prop-base/stylesheet.css.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/allclasses-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/allclasses-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,79 @@ + + + + + + +All Classes + + + + + + + + + + + +All Classes +
+ + + + + +
DocHarvesterCLIMD +
+DocHarvesterCLIRDFMD +
+DocHarvesterThreaded +
+DonatusAnalyzer +
+FileDocument +
+HarvesterCLI +
+HarvesterCLIMD +
+HarvesterCLIRDFMD +
+HarvesterThreaded +
+LanguageAnalyzer +
+LanguageAnalyzers +
+LineCollector +
+MorphDocument +
+OCRDocument +
+OCRHarvesterCLI +
+OCRHarvesterCLIMD +
+OCRHarvesterCLIRDFMD +
+OCRHarvesterThreaded +
+OcropusLineDocument +
+OCRProcessFileThread +
+ParseIndexMeta +
+ParseOcrDocument +
+ParseOcrDocument +
+ProcessFileThread +
+XMLFilteredReader +
+
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/allclasses-noframe.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/allclasses-noframe.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,79 @@ + + + + + + +All Classes + + + + + + + + + + + +All Classes +
+ + + + + +
DocHarvesterCLIMD +
+DocHarvesterCLIRDFMD +
+DocHarvesterThreaded +
+DonatusAnalyzer +
+FileDocument +
+HarvesterCLI +
+HarvesterCLIMD +
+HarvesterCLIRDFMD +
+HarvesterThreaded +
+LanguageAnalyzer +
+LanguageAnalyzers +
+LineCollector +
+MorphDocument +
+OCRDocument +
+OCRHarvesterCLI +
+OCRHarvesterCLIMD +
+OCRHarvesterCLIRDFMD +
+OCRHarvesterThreaded +
+OcropusLineDocument +
+OCRProcessFileThread +
+ParseIndexMeta +
+ParseOcrDocument +
+ParseOcrDocument +
+ProcessFileThread +
+XMLFilteredReader +
+
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/constant-values.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/constant-values.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,288 @@ + + + + + + +Constant Field Values + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Constant Field Values

+
+
+Contents + + + + + + +
+de.mpiwg.*
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded
+private static final java.lang.StringCOMPOSEDFN"doc.xml"
+private static final booleanCREATE_NEWfalse
+private static final booleanDEBUGfalse
+private static final intMAXFILES3
+private static final java.lang.StringPREFIX"/tmp/composed/files"
+ +

+ +

+ + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
+private static final booleanDEBUGfalse
+private static final intMAXFILES100
+ +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
+private static final intDELETED_OLD_VERSION2
+private static final intDELETED_WRONG_LANGUAGE1
+private static final intFILE_EXISTS-1
+private static final intNEW_FILE0
+private static final java.lang.StringTEXTIDFROMPATH_REGEXP".*(/(permanent|experimental)/.*)"
+ +

+ +

+ + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers
+private static final intMAX_HITS_PER_PAGE10
+private static final longserialVersionUID2L
+ +

+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/deprecated-list.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/deprecated-list.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,146 @@ + + + + + + +Deprecated List + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Deprecated API

+
+
+Contents
    +
+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/help-doc.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/help-doc.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,223 @@ + + + + + + +API Help + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+How This API Document Is Organized

+
+This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

+Overview

+
+ +

+The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

+

+Package

+
+ +

+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

    +
  • Interfaces (italic)
  • Classes
  • Enums
  • Exceptions
  • Errors
  • Annotation Types
+
+

+Class/Interface

+
+ +

+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
  • Class inheritance diagram
  • Direct Subclasses
  • All Known Subinterfaces
  • All Known Implementing Classes
  • Class/interface declaration
  • Class/interface description +

    +

  • Nested Class Summary
  • Field Summary
  • Constructor Summary
  • Method Summary +

    +

  • Field Detail
  • Constructor Detail
  • Method Detail
+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+ +

+Annotation Type

+
+ +

+Each annotation type has its own separate page with the following sections:

    +
  • Annotation Type declaration
  • Annotation Type description
  • Required Element Summary
  • Optional Element Summary
  • Element Detail
+
+ +

+Enum

+
+ +

+Each enum has its own separate page with the following sections:

    +
  • Enum declaration
  • Enum description
  • Enum Constant Summary
  • Enum Constant Detail
+
+

+Use

+
+Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
+

+Tree (Class Hierarchy)

+
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.
    +
  • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
  • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
+
+

+Deprecated API

+
+The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+

+Index

+
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+

+Prev/Next

+These links take you to the next or previous class, interface, package, or related page.

+Frames/No Frames

+These links show and hide the HTML frames. All pages are available with or without frames. +

+

+Serialized Form

+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description. +

+

+Constant Field Values

+The Constant Field Values page lists the static final fields and their values. +

+ + +This help file applies to API documentation generated using the standard doclet. + +
+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/index.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/index.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,39 @@ + + + + + + +Generated Documentation (Untitled) + + + + + + + + + + + +<H2> +Frame Alert</H2> + +<P> +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. +<BR> +Link to<A HREF="overview-summary.html">Non-frame version.</A> + + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/overview-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/overview-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,58 @@ + + + + + + +Overview List + + + + + + + + + + + + + + + +
+
+ + + + + +
All Classes +

+ +Packages +
+de.mpiwg.dwinter.fulltextIndexer.harvester +
+de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+de.mpiwg.dwinter.fulltextIndexer.harvester.processors +
+de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+de.mpiwg.dwinter.fulltextIndexer.utils +
+de.mpiwg.dwinter.lucencetools +
+de.mpiwg.dwinter.lucencetools.analyzer +
+de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +
+de.mpiwg.dwinter.lucencetools.documents +
+

+ +

+  + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/overview-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/overview-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,183 @@ + + + + + + +Overview + + + + + + + + + + + + +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Packages
de.mpiwg.dwinter.fulltextIndexer.harvester 
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI 
de.mpiwg.dwinter.fulltextIndexer.harvester.processors 
de.mpiwg.dwinter.fulltextIndexer.OCRutils 
de.mpiwg.dwinter.fulltextIndexer.utils 
de.mpiwg.dwinter.lucencetools 
de.mpiwg.dwinter.lucencetools.analyzer 
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer 
de.mpiwg.dwinter.lucencetools.documents 
+ +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/overview-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/overview-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,188 @@ + + + + + + +Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For All Packages

+
+
+
Package Hierarchies:
de.mpiwg.dwinter.fulltextIndexer.harvester, de.mpiwg.dwinter.fulltextIndexer.harvester.CLI, de.mpiwg.dwinter.fulltextIndexer.harvester.processors, de.mpiwg.dwinter.fulltextIndexer.OCRutils, de.mpiwg.dwinter.fulltextIndexer.utils, de.mpiwg.dwinter.lucencetools, de.mpiwg.dwinter.lucencetools.analyzer, de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer, de.mpiwg.dwinter.lucencetools.documents
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • java.util.AbstractCollection<E> (implements java.util.Collection<E>) +
        +
      • java.util.AbstractList<E> (implements java.util.List<E>) +
          +
        • java.util.ArrayList<E> (implements java.lang.Cloneable, java.util.List<E>, java.util.RandomAccess, java.io.Serializable) + +
        +
      +
    • org.apache.lucene.search.Collector
        +
      • org.apache.lucene.search.TopDocsCollector<T>
          +
        • org.apache.lucene.search.TopScoreDocCollector +
        +
      +
    • org.xml.sax.helpers.DefaultHandler (implements org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler) + +
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded
    • de.mpiwg.dwinter.lucencetools.documents.FileDocument
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
    • de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer
    • de.mpiwg.dwinter.lucencetools.documents.MorphDocument
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD
    • de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument
    • de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta (implements org.xml.sax.ContentHandler) +
    • java.io.Reader (implements java.io.Closeable, java.lang.Readable) +
        +
      • java.io.InputStreamReader +
      +
    • java.lang.Thread (implements java.lang.Runnable) + +
    +
+

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/package-list.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/package-list.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +de.mpiwg.dwinter.fulltextIndexer.harvester +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +de.mpiwg.dwinter.fulltextIndexer.harvester.processors +de.mpiwg.dwinter.fulltextIndexer.utils +de.mpiwg.dwinter.lucencetools +de.mpiwg.dwinter.lucencetools.analyzer +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +de.mpiwg.dwinter.lucencetools.documents diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/serialized-form.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/serialized-form.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,166 @@ + + + + + + +Serialized Form + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Serialized Form

+
+
+ + + + + +
+Package de.mpiwg.dwinter.lucencetools.analyzer
+ +

+ + + + + +
+Class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers extends java.util.ArrayList<LanguageAnalyzer> implements Serializable
+ +

+serialVersionUID: 2L + +

+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/.svn/text-base/stylesheet.css.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/.svn/text-base/stylesheet.css.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,29 @@ +/* Javadoc style sheet */ + +/* Define colors, fonts and other style attributes here to override the defaults */ + +/* Page background color */ +body { background-color: #FFFFFF; color:#000000 } + +/* Headings */ +h1 { font-size: 145% } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ +.TableRowColor { background: #FFFFFF; color:#000000 } /* White */ + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} + diff -r 000000000000 -r dc7622afcfea doc/allclasses-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/allclasses-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,79 @@ + + + + + + +All Classes + + + + + + + + + + + +All Classes +
+ + + + + +
DocHarvesterCLIMD +
+DocHarvesterCLIRDFMD +
+DocHarvesterThreaded +
+DonatusAnalyzer +
+FileDocument +
+HarvesterCLI +
+HarvesterCLIMD +
+HarvesterCLIRDFMD +
+HarvesterThreaded +
+LanguageAnalyzer +
+LanguageAnalyzers +
+LineCollector +
+MorphDocument +
+OCRDocument +
+OCRHarvesterCLI +
+OCRHarvesterCLIMD +
+OCRHarvesterCLIRDFMD +
+OCRHarvesterThreaded +
+OcropusLineDocument +
+OCRProcessFileThread +
+ParseIndexMeta +
+ParseOcrDocument +
+ParseOcrDocument +
+ProcessFileThread +
+XMLFilteredReader +
+
+ + + diff -r 000000000000 -r dc7622afcfea doc/allclasses-noframe.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/allclasses-noframe.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,79 @@ + + + + + + +All Classes + + + + + + + + + + + +All Classes +
+ + + + + +
DocHarvesterCLIMD +
+DocHarvesterCLIRDFMD +
+DocHarvesterThreaded +
+DonatusAnalyzer +
+FileDocument +
+HarvesterCLI +
+HarvesterCLIMD +
+HarvesterCLIRDFMD +
+HarvesterThreaded +
+LanguageAnalyzer +
+LanguageAnalyzers +
+LineCollector +
+MorphDocument +
+OCRDocument +
+OCRHarvesterCLI +
+OCRHarvesterCLIMD +
+OCRHarvesterCLIRDFMD +
+OCRHarvesterThreaded +
+OcropusLineDocument +
+OCRProcessFileThread +
+ParseIndexMeta +
+ParseOcrDocument +
+ParseOcrDocument +
+ProcessFileThread +
+XMLFilteredReader +
+
+ + + diff -r 000000000000 -r dc7622afcfea doc/constant-values.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/constant-values.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,288 @@ + + + + + + +Constant Field Values + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Constant Field Values

+
+
+Contents + + + + + + +
+de.mpiwg.*
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded
+private static final java.lang.StringCOMPOSEDFN"doc.xml"
+private static final booleanCREATE_NEWfalse
+private static final booleanDEBUGfalse
+private static final intMAXFILES3
+private static final java.lang.StringPREFIX"/tmp/composed/files"
+ +

+ +

+ + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
+private static final booleanDEBUGfalse
+private static final intMAXFILES100
+ +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
+private static final intDELETED_OLD_VERSION2
+private static final intDELETED_WRONG_LANGUAGE1
+private static final intFILE_EXISTS-1
+private static final intNEW_FILE0
+private static final java.lang.StringTEXTIDFROMPATH_REGEXP".*(/(permanent|experimental)/.*)"
+ +

+ +

+ + + + + + + + + + + + + + + + + +
de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers
+private static final intMAX_HITS_PER_PAGE10
+private static final longserialVersionUID2L
+ +

+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 47 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de +END diff -r 000000000000 -r dc7622afcfea doc/de/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,38 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +mpiwg +dir + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 53 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,38 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +dwinter +dir + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 61 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,41 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +fulltextIndexer +dir + +lucencetools +dir + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 77 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,44 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRutils +dir + +harvester +dir + +utils +dir + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,47 @@ +K 25 +svn:wc:ra_dav:version-url +V 86 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils +END +OCRDocument.html +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.html +END +ParseOcrDocument.html +K 25 +svn:wc:ra_dav:version-url +V 108 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/ParseOcrDocument.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 104 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-tree.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 107 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-summary.html +END +OCRDocument.OCRLine.html +K 25 +svn:wc:ra_dav:version-url +V 111 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.OCRLine.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,276 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRDocument.OCRLine.html +file + + + + +2010-06-28T09:04:52.000000Z +988cdb271456f2f373c6f611e5d8087a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +12616 + +OCRDocument.html +file + + + + +2010-06-28T09:04:52.000000Z +a1c932067b913b579518911e19863db1 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13482 + +ParseOcrDocument.html +file + + + + +2010-06-28T09:04:52.000000Z +dde7556c600d2f8383b5b164136fda43 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +26191 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +fcb51606d5c5b1a5b13ac75959a47b99 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1100 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +9b5d77ed1f04cf1ab983112f44e29447 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6777 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +56e312e3dd013137d60e079a415e6061 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7037 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +5c5ff13198f3667972285dd203e3b8f8 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8399 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/OCRDocument.OCRLine.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/OCRDocument.OCRLine.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/OCRDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/OCRDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/OCRDocument.OCRLine.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/OCRDocument.OCRLine.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,317 @@ + + + + + + +OCRDocument.OCRLine + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+Class OCRDocument.OCRLine

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine
+
+
+
Enclosing class:
OCRDocument
+
+
+
+
public class OCRDocument.OCRLine
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + +
+Field Summary
+ java.lang.Stringbbox + +
+           
+ java.lang.Stringcontent + +
+           
+ java.lang.StringlineNumber + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
OCRDocument.OCRLine() + +
+           
OCRDocument.OCRLine(java.lang.String lineNumber, + java.lang.String bbox, + java.lang.String content) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+lineNumber

+
+public java.lang.String lineNumber
+
+
+
+
+
+ +

+bbox

+
+public java.lang.String bbox
+
+
+
+
+
+ +

+content

+
+public java.lang.String content
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+OCRDocument.OCRLine

+
+public OCRDocument.OCRLine(java.lang.String lineNumber,
+                           java.lang.String bbox,
+                           java.lang.String content)
+
+
+
+ +

+OCRDocument.OCRLine

+
+public OCRDocument.OCRLine()
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/OCRDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/OCRDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,331 @@ + + + + + + +OCRDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+Class OCRDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument
+
+
+
+
public class OCRDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Nested Class Summary
+ classOCRDocument.OCRLine + +
+           
+ + + + + + + + + + + + + + + + + + +
+Field Summary
+ java.lang.Stringfilename + +
+           
+ java.util.ArrayList<OCRDocument.OCRLine>OCRLines + +
+           
+ java.lang.StringpageDimension + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
OCRDocument() + +
+           
OCRDocument(java.lang.String filename, + java.lang.String pageDimension, + java.util.ArrayList<OCRDocument.OCRLine> lines) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+filename

+
+public java.lang.String filename
+
+
+
+
+
+ +

+pageDimension

+
+public java.lang.String pageDimension
+
+
+
+
+
+ +

+OCRLines

+
+public java.util.ArrayList<OCRDocument.OCRLine> OCRLines
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+OCRDocument

+
+public OCRDocument(java.lang.String filename,
+                   java.lang.String pageDimension,
+                   java.util.ArrayList<OCRDocument.OCRLine> lines)
+
+
+
+ +

+OCRDocument

+
+public OCRDocument()
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,668 @@ + + + + + + +ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+Class ParseOcrDocument

+
+java.lang.Object
+  extended by org.xml.sax.helpers.DefaultHandler
+      extended by de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument
+
+
+
All Implemented Interfaces:
org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler
+
+
+
+
public class ParseOcrDocument
extends org.xml.sax.helpers.DefaultHandler
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  OCRDocument.OCRLinecurrentLine + +
+           
+private  booleaninLine + +
+           
+private  intlineCounter + +
+           
+ OCRDocumentocrDocument + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ParseOcrDocument() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidcharacters(char[] c, + int start, + int length) + +
+           
+private  voiddoLine(org.xml.sax.Attributes attrs) + +
+           
+private  voiddoPage(org.xml.sax.Attributes attrs) + +
+           
+ voidendDocument() + +
+           
+ voidendElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name) + +
+           
+ voidendPrefixMapping(java.lang.String prefix) + +
+           
+ voidignorableWhitespace(char[] c, + int start, + int length) + +
+           
+ voidprocessingInstruction(java.lang.String target, + java.lang.String data) + +
+           
+ voidsetDocumentLocator(org.xml.sax.Locator arg1) + +
+           
+ voidskippedEntity(java.lang.String name) + +
+           
+ voidstartDocument() + +
+           
+ voidstartElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name, + org.xml.sax.Attributes attrs) + +
+           
+ voidstartPrefixMapping(java.lang.String prefix, + java.lang.String uri) + +
+           
+ + + + + + + +
Methods inherited from class org.xml.sax.helpers.DefaultHandler
error, fatalError, notationDecl, resolveEntity, unparsedEntityDecl, warning
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+ocrDocument

+
+public OCRDocument ocrDocument
+
+
+
+
+
+ +

+lineCounter

+
+private int lineCounter
+
+
+
+
+
+ +

+currentLine

+
+private OCRDocument.OCRLine currentLine
+
+
+
+
+
+ +

+inLine

+
+private boolean inLine
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ParseOcrDocument

+
+public ParseOcrDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+startDocument

+
+public void startDocument()
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
startDocument in interface org.xml.sax.ContentHandler
Overrides:
startDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endDocument

+
+public void endDocument()
+                 throws org.xml.sax.SAXException
+
+
+
Specified by:
endDocument in interface org.xml.sax.ContentHandler
Overrides:
endDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+characters

+
+public void characters(char[] c,
+                       int start,
+                       int length)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
characters in interface org.xml.sax.ContentHandler
Overrides:
characters in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+ignorableWhitespace

+
+public void ignorableWhitespace(char[] c,
+                                int start,
+                                int length)
+                         throws org.xml.sax.SAXException
+
+
+
Specified by:
ignorableWhitespace in interface org.xml.sax.ContentHandler
Overrides:
ignorableWhitespace in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+processingInstruction

+
+public void processingInstruction(java.lang.String target,
+                                  java.lang.String data)
+                           throws org.xml.sax.SAXException
+
+
+
Specified by:
processingInstruction in interface org.xml.sax.ContentHandler
Overrides:
processingInstruction in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+setDocumentLocator

+
+public void setDocumentLocator(org.xml.sax.Locator arg1)
+
+
+
Specified by:
setDocumentLocator in interface org.xml.sax.ContentHandler
Overrides:
setDocumentLocator in class org.xml.sax.helpers.DefaultHandler
+
+
+
+
+
+
+ +

+endElement

+
+public void endElement(java.lang.String uri,
+                       java.lang.String localName,
+                       java.lang.String name)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
endElement in interface org.xml.sax.ContentHandler
Overrides:
endElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endPrefixMapping

+
+public void endPrefixMapping(java.lang.String prefix)
+                      throws org.xml.sax.SAXException
+
+
+
Specified by:
endPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
endPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+skippedEntity

+
+public void skippedEntity(java.lang.String name)
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
skippedEntity in interface org.xml.sax.ContentHandler
Overrides:
skippedEntity in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startElement

+
+public void startElement(java.lang.String uri,
+                         java.lang.String localName,
+                         java.lang.String name,
+                         org.xml.sax.Attributes attrs)
+                  throws org.xml.sax.SAXException
+
+
+
Specified by:
startElement in interface org.xml.sax.ContentHandler
Overrides:
startElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+doPage

+
+private void doPage(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+doLine

+
+private void doLine(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+startPrefixMapping

+
+public void startPrefixMapping(java.lang.String prefix,
+                               java.lang.String uri)
+                        throws org.xml.sax.SAXException
+
+
+
Specified by:
startPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
startPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,34 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + +
+Classes  + +
+OCRDocument +
+ParseOcrDocument
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,161 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.OCRutils +

+ + + + + + + + + + + + + +
+Class Summary
OCRDocument 
ParseOcrDocument 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,156 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.OCRutils +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.xml.sax.helpers.DefaultHandler (implements org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler) + +
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,195 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.OCRutils

+
+ + + + + + + + + + + + + +
+Packages that use de.mpiwg.dwinter.fulltextIndexer.OCRutils
de.mpiwg.dwinter.fulltextIndexer.OCRutils  
de.mpiwg.dwinter.lucencetools.documents  
+  +

+ + + + + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.OCRutils used by de.mpiwg.dwinter.fulltextIndexer.OCRutils
OCRDocument + +
+           
OCRDocument.OCRLine + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.OCRutils used by de.mpiwg.dwinter.lucencetools.documents
OCRDocument.OCRLine + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.OCRLine.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.OCRLine.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,317 @@ + + + + + + +OCRDocument.OCRLine + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+Class OCRDocument.OCRLine

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine
+
+
+
Enclosing class:
OCRDocument
+
+
+
+
public class OCRDocument.OCRLine
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + +
+Field Summary
+ java.lang.Stringbbox + +
+           
+ java.lang.Stringcontent + +
+           
+ java.lang.StringlineNumber + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
OCRDocument.OCRLine() + +
+           
OCRDocument.OCRLine(java.lang.String lineNumber, + java.lang.String bbox, + java.lang.String content) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+lineNumber

+
+public java.lang.String lineNumber
+
+
+
+
+
+ +

+bbox

+
+public java.lang.String bbox
+
+
+
+
+
+ +

+content

+
+public java.lang.String content
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+OCRDocument.OCRLine

+
+public OCRDocument.OCRLine(java.lang.String lineNumber,
+                           java.lang.String bbox,
+                           java.lang.String content)
+
+
+
+ +

+OCRDocument.OCRLine

+
+public OCRDocument.OCRLine()
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,331 @@ + + + + + + +OCRDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+Class OCRDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument
+
+
+
+
public class OCRDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Nested Class Summary
+ classOCRDocument.OCRLine + +
+           
+ + + + + + + + + + + + + + + + + + +
+Field Summary
+ java.lang.Stringfilename + +
+           
+ java.util.ArrayList<OCRDocument.OCRLine>OCRLines + +
+           
+ java.lang.StringpageDimension + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
OCRDocument() + +
+           
OCRDocument(java.lang.String filename, + java.lang.String pageDimension, + java.util.ArrayList<OCRDocument.OCRLine> lines) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+filename

+
+public java.lang.String filename
+
+
+
+
+
+ +

+pageDimension

+
+public java.lang.String pageDimension
+
+
+
+
+
+ +

+OCRLines

+
+public java.util.ArrayList<OCRDocument.OCRLine> OCRLines
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+OCRDocument

+
+public OCRDocument(java.lang.String filename,
+                   java.lang.String pageDimension,
+                   java.util.ArrayList<OCRDocument.OCRLine> lines)
+
+
+
+ +

+OCRDocument

+
+public OCRDocument()
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/ParseOcrDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/ParseOcrDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,668 @@ + + + + + + +ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+Class ParseOcrDocument

+
+java.lang.Object
+  extended by org.xml.sax.helpers.DefaultHandler
+      extended by de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument
+
+
+
All Implemented Interfaces:
org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler
+
+
+
+
public class ParseOcrDocument
extends org.xml.sax.helpers.DefaultHandler
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  OCRDocument.OCRLinecurrentLine + +
+           
+private  booleaninLine + +
+           
+private  intlineCounter + +
+           
+ OCRDocumentocrDocument + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ParseOcrDocument() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidcharacters(char[] c, + int start, + int length) + +
+           
+private  voiddoLine(org.xml.sax.Attributes attrs) + +
+           
+private  voiddoPage(org.xml.sax.Attributes attrs) + +
+           
+ voidendDocument() + +
+           
+ voidendElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name) + +
+           
+ voidendPrefixMapping(java.lang.String prefix) + +
+           
+ voidignorableWhitespace(char[] c, + int start, + int length) + +
+           
+ voidprocessingInstruction(java.lang.String target, + java.lang.String data) + +
+           
+ voidsetDocumentLocator(org.xml.sax.Locator arg1) + +
+           
+ voidskippedEntity(java.lang.String name) + +
+           
+ voidstartDocument() + +
+           
+ voidstartElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name, + org.xml.sax.Attributes attrs) + +
+           
+ voidstartPrefixMapping(java.lang.String prefix, + java.lang.String uri) + +
+           
+ + + + + + + +
Methods inherited from class org.xml.sax.helpers.DefaultHandler
error, fatalError, notationDecl, resolveEntity, unparsedEntityDecl, warning
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+ocrDocument

+
+public OCRDocument ocrDocument
+
+
+
+
+
+ +

+lineCounter

+
+private int lineCounter
+
+
+
+
+
+ +

+currentLine

+
+private OCRDocument.OCRLine currentLine
+
+
+
+
+
+ +

+inLine

+
+private boolean inLine
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ParseOcrDocument

+
+public ParseOcrDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+startDocument

+
+public void startDocument()
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
startDocument in interface org.xml.sax.ContentHandler
Overrides:
startDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endDocument

+
+public void endDocument()
+                 throws org.xml.sax.SAXException
+
+
+
Specified by:
endDocument in interface org.xml.sax.ContentHandler
Overrides:
endDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+characters

+
+public void characters(char[] c,
+                       int start,
+                       int length)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
characters in interface org.xml.sax.ContentHandler
Overrides:
characters in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+ignorableWhitespace

+
+public void ignorableWhitespace(char[] c,
+                                int start,
+                                int length)
+                         throws org.xml.sax.SAXException
+
+
+
Specified by:
ignorableWhitespace in interface org.xml.sax.ContentHandler
Overrides:
ignorableWhitespace in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+processingInstruction

+
+public void processingInstruction(java.lang.String target,
+                                  java.lang.String data)
+                           throws org.xml.sax.SAXException
+
+
+
Specified by:
processingInstruction in interface org.xml.sax.ContentHandler
Overrides:
processingInstruction in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+setDocumentLocator

+
+public void setDocumentLocator(org.xml.sax.Locator arg1)
+
+
+
Specified by:
setDocumentLocator in interface org.xml.sax.ContentHandler
Overrides:
setDocumentLocator in class org.xml.sax.helpers.DefaultHandler
+
+
+
+
+
+
+ +

+endElement

+
+public void endElement(java.lang.String uri,
+                       java.lang.String localName,
+                       java.lang.String name)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
endElement in interface org.xml.sax.ContentHandler
Overrides:
endElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endPrefixMapping

+
+public void endPrefixMapping(java.lang.String prefix)
+                      throws org.xml.sax.SAXException
+
+
+
Specified by:
endPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
endPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+skippedEntity

+
+public void skippedEntity(java.lang.String name)
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
skippedEntity in interface org.xml.sax.ContentHandler
Overrides:
skippedEntity in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startElement

+
+public void startElement(java.lang.String uri,
+                         java.lang.String localName,
+                         java.lang.String name,
+                         org.xml.sax.Attributes attrs)
+                  throws org.xml.sax.SAXException
+
+
+
Specified by:
startElement in interface org.xml.sax.ContentHandler
Overrides:
startElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+doPage

+
+private void doPage(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+doLine

+
+private void doLine(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+startPrefixMapping

+
+public void startPrefixMapping(java.lang.String prefix,
+                               java.lang.String uri)
+                        throws org.xml.sax.SAXException
+
+
+
Specified by:
startPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
startPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 96 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use +END +OCRDocument.html +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/OCRDocument.html +END +ParseOcrDocument.html +K 25 +svn:wc:ra_dav:version-url +V 118 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/ParseOcrDocument.html +END +OCRDocument.OCRLine.html +K 25 +svn:wc:ra_dav:version-url +V 121 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/OCRDocument.OCRLine.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,137 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRDocument.OCRLine.html +file + + + + +2010-06-28T09:04:53.000000Z +0be964a6898d79ca6a657e702b6b162c +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +15790 + +OCRDocument.html +file + + + + +2010-06-28T09:04:53.000000Z +a33d12c541f6723667644d3b0d01f509 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +10085 + +ParseOcrDocument.html +file + + + + +2010-06-28T09:04:53.000000Z +7a2907541058211dbc68703fe7e059e0 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6176 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/prop-base/OCRDocument.OCRLine.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/prop-base/OCRDocument.OCRLine.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/prop-base/OCRDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/prop-base/OCRDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/prop-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/prop-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/text-base/OCRDocument.OCRLine.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/text-base/OCRDocument.OCRLine.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,287 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine

+
+ + + + + + + + + + + + + + + + + +
+Packages that use OCRDocument.OCRLine
de.mpiwg.dwinter.fulltextIndexer.OCRutils  
de.mpiwg.dwinter.fulltextIndexer.utils  
de.mpiwg.dwinter.lucencetools.documents  
+  +

+ + + + + +
+Uses of OCRDocument.OCRLine in de.mpiwg.dwinter.fulltextIndexer.OCRutils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.OCRutils declared as OCRDocument.OCRLine
+private  OCRDocument.OCRLineParseOcrDocument.currentLine + +
+           
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.OCRutils with type parameters of type OCRDocument.OCRLine
+ java.util.ArrayList<OCRDocument.OCRLine>OCRDocument.OCRLines + +
+           
+  +

+ + + + + + + + +
Constructor parameters in de.mpiwg.dwinter.fulltextIndexer.OCRutils with type arguments of type OCRDocument.OCRLine
OCRDocument(java.lang.String filename, + java.lang.String pageDimension, + java.util.ArrayList<OCRDocument.OCRLine> lines) + +
+           
+  +

+ + + + + +
+Uses of OCRDocument.OCRLine in de.mpiwg.dwinter.fulltextIndexer.utils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.utils declared as OCRDocument.OCRLine
+private  OCRDocument.OCRLineParseOcrDocument.currentLine + +
+           
+  +

+ + + + + +
+Uses of OCRDocument.OCRLine in de.mpiwg.dwinter.lucencetools.documents
+  +

+ + + + + + + + + + + + + +
Methods in de.mpiwg.dwinter.lucencetools.documents with parameters of type OCRDocument.OCRLine
+static org.apache.lucene.document.DocumentOcropusLineDocument.Document(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String textId) + +
+           
+static org.apache.lucene.document.DocumentOcropusLineDocument.Document(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/text-base/OCRDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/text-base/OCRDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,209 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument

+
+ + + + + + + + + + + + + +
+Packages that use OCRDocument
de.mpiwg.dwinter.fulltextIndexer.OCRutils  
de.mpiwg.dwinter.fulltextIndexer.utils  
+  +

+ + + + + +
+Uses of OCRDocument in de.mpiwg.dwinter.fulltextIndexer.OCRutils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.OCRutils declared as OCRDocument
+ OCRDocumentParseOcrDocument.ocrDocument + +
+           
+  +

+ + + + + +
+Uses of OCRDocument in de.mpiwg.dwinter.fulltextIndexer.utils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.utils declared as OCRDocument
+ OCRDocumentParseOcrDocument.ocrDocument + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/text-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/.svn/text-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/OCRDocument.OCRLine.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/OCRDocument.OCRLine.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,287 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine

+
+ + + + + + + + + + + + + + + + + +
+Packages that use OCRDocument.OCRLine
de.mpiwg.dwinter.fulltextIndexer.OCRutils  
de.mpiwg.dwinter.fulltextIndexer.utils  
de.mpiwg.dwinter.lucencetools.documents  
+  +

+ + + + + +
+Uses of OCRDocument.OCRLine in de.mpiwg.dwinter.fulltextIndexer.OCRutils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.OCRutils declared as OCRDocument.OCRLine
+private  OCRDocument.OCRLineParseOcrDocument.currentLine + +
+           
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.OCRutils with type parameters of type OCRDocument.OCRLine
+ java.util.ArrayList<OCRDocument.OCRLine>OCRDocument.OCRLines + +
+           
+  +

+ + + + + + + + +
Constructor parameters in de.mpiwg.dwinter.fulltextIndexer.OCRutils with type arguments of type OCRDocument.OCRLine
OCRDocument(java.lang.String filename, + java.lang.String pageDimension, + java.util.ArrayList<OCRDocument.OCRLine> lines) + +
+           
+  +

+ + + + + +
+Uses of OCRDocument.OCRLine in de.mpiwg.dwinter.fulltextIndexer.utils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.utils declared as OCRDocument.OCRLine
+private  OCRDocument.OCRLineParseOcrDocument.currentLine + +
+           
+  +

+ + + + + +
+Uses of OCRDocument.OCRLine in de.mpiwg.dwinter.lucencetools.documents
+  +

+ + + + + + + + + + + + + +
Methods in de.mpiwg.dwinter.lucencetools.documents with parameters of type OCRDocument.OCRLine
+static org.apache.lucene.document.DocumentOcropusLineDocument.Document(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String textId) + +
+           
+static org.apache.lucene.document.DocumentOcropusLineDocument.Document(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/OCRDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/OCRDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,209 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument

+
+ + + + + + + + + + + + + +
+Packages that use OCRDocument
de.mpiwg.dwinter.fulltextIndexer.OCRutils  
de.mpiwg.dwinter.fulltextIndexer.utils  
+  +

+ + + + + +
+Uses of OCRDocument in de.mpiwg.dwinter.fulltextIndexer.OCRutils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.OCRutils declared as OCRDocument
+ OCRDocumentParseOcrDocument.ocrDocument + +
+           
+  +

+ + + + + +
+Uses of OCRDocument in de.mpiwg.dwinter.fulltextIndexer.utils
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.utils declared as OCRDocument
+ OCRDocumentParseOcrDocument.ocrDocument + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/ParseOcrDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/class-use/ParseOcrDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,34 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + +
+Classes  + +
+OCRDocument +
+ParseOcrDocument
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,161 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.OCRutils +

+ + + + + + + + + + + + + +
+Class Summary
OCRDocument 
ParseOcrDocument 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,156 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.OCRutils Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.OCRutils +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.xml.sax.helpers.DefaultHandler (implements org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler) + +
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/OCRutils/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,195 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.OCRutils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.OCRutils

+
+ + + + + + + + + + + + + +
+Packages that use de.mpiwg.dwinter.fulltextIndexer.OCRutils
de.mpiwg.dwinter.fulltextIndexer.OCRutils  
de.mpiwg.dwinter.lucencetools.documents  
+  +

+ + + + + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.OCRutils used by de.mpiwg.dwinter.fulltextIndexer.OCRutils
OCRDocument + +
+           
OCRDocument.OCRLine + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.OCRutils used by de.mpiwg.dwinter.lucencetools.documents
OCRDocument.OCRLine + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,47 @@ +K 25 +svn:wc:ra_dav:version-url +V 87 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester +END +DocHarvesterThreaded.html +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/DocHarvesterThreaded.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 106 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-tree.html +END +OCRHarvesterThreaded.html +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 104 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-use.html +END +HarvesterThreaded.html +K 25 +svn:wc:ra_dav:version-url +V 110 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/HarvesterThreaded.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 108 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-summary.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,282 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +CLI +dir + +DocHarvesterThreaded.html +file + + + + +2010-06-28T09:04:52.000000Z +96fde2e859bc70f67e0738a843ac1ac5 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +37576 + +HarvesterThreaded.html +file + + + + +2010-06-28T09:04:52.000000Z +7dffffa9a1c8a4d8fdb83d300b25eea2 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +32083 + +OCRHarvesterThreaded.html +file + + + + +2010-06-28T09:04:52.000000Z +32b7ce6d92d37dc6da1a0056b84ec505 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +19389 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +da561592e3bbfae18368116bcc259d8c +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1271 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +4f6b9589c916ea0249418115bbcbd450 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6872 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +cbbd878b364a114969c4da239bee7245 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6702 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +f74dae30441d1cc34ae65d1650d46a5a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7067 + +processors +dir + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/DocHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/DocHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/HarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/HarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/OCRHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/OCRHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/DocHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/DocHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,1041 @@ + + + + + + +DocHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester +
+Class DocHarvesterThreaded

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded
+
+
+
+
public class DocHarvesterThreaded
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private static java.lang.StringCOMPOSEDFN + +
+           
+private  intcounter + +
+           
+private static booleanCREATE_NEW + +
+           
+private static booleanDEBUG + +
+           
+protected  java.io.FiledocDir + +
+           
+protected static java.util.ArrayList<java.lang.String>excludeFolders + +
+           
+private  intfilecount + +
+           
+protected static java.util.ArrayList<java.lang.String>fileTypesToIndex + +
+           
+protected  java.io.Fileindex_dir + +
+           
+protected static booleanindexMetaPriority + +
+           
+protected  LanguageAnalyzerslanguageAnalyzers + +
+           
+protected  java.lang.StringlanguageFileName + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>languageToISO + +
+           
+private static intMAXFILES + +
+           
+protected static intmaxThread + +
+           
+protected  java.lang.StringmdProviderUrl + +
+           
+protected  ProcessFileThread[]mythreads + +
+           
+private  java.lang.StringpreferedLanguage + +
+           
+private static java.lang.StringPREFIX + +
+           
+private static java.lang.StringspecialMode + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>supportedLanguageFolder + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>textLanguage + +
+           
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
DocHarvesterThreaded() + +
+           
DocHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl) + +
+           
DocHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private  booleancheckFileExists(java.io.File folder) + +
+           
+private  voidclearFile(java.io.File folder) + +
+           
+private  voidcompose(java.io.File folder, + java.io.File file) + +
+           
+private  voidfinishFile(java.io.File folder) + +
+           
+private  java.io.FilegetComposedFile(java.io.File folder) + +
+           
+private  java.util.ArrayList<java.lang.String>getFileListFromRDF(java.lang.String rdffilepath) + +
+           
+ voidharvestFolder() + +
+           
+ voidharvestFromRDF(java.lang.String rdffilepath) + +
+           
+private  voidindexDocs(java.util.ArrayList<java.lang.String> files) + +
+           
+(package private)  voidindexDocs(java.io.File file) + +
+           
+private  voidinit_languages() + +
+           
+private  booleanisTextFile(java.io.File file) + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>loadLanguages() + +
+           
+private  voidprocessCompleteFile(java.io.File folder) + +
+           
+protected  voidprocessFile(java.io.File file) + +
+           
+protected  intwaitForFreeThread() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DEBUG

+
+private static final boolean DEBUG
+
+
+
See Also:
Constant Field Values
+
+
+ +

+MAXFILES

+
+private static final int MAXFILES
+
+
+
See Also:
Constant Field Values
+
+
+ +

+PREFIX

+
+private static final java.lang.String PREFIX
+
+
+
See Also:
Constant Field Values
+
+
+ +

+COMPOSEDFN

+
+private static final java.lang.String COMPOSEDFN
+
+
+
See Also:
Constant Field Values
+
+
+ +

+CREATE_NEW

+
+private static final boolean CREATE_NEW
+
+
+
See Also:
Constant Field Values
+
+
+ +

+fileTypesToIndex

+
+protected static java.util.ArrayList<java.lang.String> fileTypesToIndex
+
+
+
+
+
+ +

+excludeFolders

+
+protected static java.util.ArrayList<java.lang.String> excludeFolders
+
+
+
+
+
+ +

+indexMetaPriority

+
+protected static boolean indexMetaPriority
+
+
+
+
+
+ +

+specialMode

+
+private static java.lang.String specialMode
+
+
+
+
+
+ +

+maxThread

+
+protected static int maxThread
+
+
+
+
+
+ +

+docDir

+
+protected java.io.File docDir
+
+
+
+
+
+ +

+index_dir

+
+protected java.io.File index_dir
+
+
+
+
+
+ +

+textLanguage

+
+protected java.util.HashMap<java.lang.String,java.lang.String> textLanguage
+
+
+
+
+
+ +

+languageToISO

+
+protected java.util.HashMap<java.lang.String,java.lang.String> languageToISO
+
+
+
+
+
+ +

+languageAnalyzers

+
+protected LanguageAnalyzers languageAnalyzers
+
+
+
+
+
+ +

+counter

+
+private int counter
+
+
+
+
+
+ +

+languageFileName

+
+protected java.lang.String languageFileName
+
+
+
+
+
+ +

+mythreads

+
+protected ProcessFileThread[] mythreads
+
+
+
+
+
+ +

+filecount

+
+private int filecount
+
+
+
+
+
+ +

+mdProviderUrl

+
+protected java.lang.String mdProviderUrl
+
+
+
+
+
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+
+ +

+supportedLanguageFolder

+
+protected java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+DocHarvesterThreaded

+
+public DocHarvesterThreaded()
+
+
+
+ +

+DocHarvesterThreaded

+
+public DocHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String languageFileName,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String lang)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+DocHarvesterThreaded

+
+public DocHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String mdProviderUrl)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+init_languages

+
+private void init_languages()
+
+
+
+
+
+
+ +

+loadLanguages

+
+protected java.util.HashMap<java.lang.String,java.lang.String> loadLanguages()
+
+
+
+
+
+
+ +

+harvestFromRDF

+
+public void harvestFromRDF(java.lang.String rdffilepath)
+                    throws java.lang.InterruptedException,
+                           org.jdom.JDOMException
+
+
+ +
Throws: +
java.lang.InterruptedException +
org.jdom.JDOMException
+
+
+
+ +

+getFileListFromRDF

+
+private java.util.ArrayList<java.lang.String> getFileListFromRDF(java.lang.String rdffilepath)
+                                                          throws org.jdom.JDOMException,
+                                                                 java.io.IOException
+
+
+ +
Throws: +
org.jdom.JDOMException +
java.io.IOException
+
+
+
+ +

+harvestFolder

+
+public void harvestFolder()
+                   throws java.lang.InterruptedException
+
+
+ +
Throws: +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+private void indexDocs(java.util.ArrayList<java.lang.String> files)
+                throws java.io.IOException,
+                       java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+void indexDocs(java.io.File file)
+         throws java.io.IOException,
+                java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+finishFile

+
+private void finishFile(java.io.File folder)
+
+
+
+
+
+
+ +

+processCompleteFile

+
+private void processCompleteFile(java.io.File folder)
+
+
+
+
+
+
+ +

+checkFileExists

+
+private boolean checkFileExists(java.io.File folder)
+
+
+
+
+
+
+ +

+clearFile

+
+private void clearFile(java.io.File folder)
+
+
+
+
+
+
+ +

+compose

+
+private void compose(java.io.File folder,
+                     java.io.File file)
+
+
+
+
+
+
+ +

+getComposedFile

+
+private java.io.File getComposedFile(java.io.File folder)
+
+
+
+
+
+
+ +

+processFile

+
+protected void processFile(java.io.File file)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           org.apache.lucene.store.LockObtainFailedException,
+                           java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+
+ +

+waitForFreeThread

+
+protected int waitForFreeThread()
+
+
+
+
+
+
+ +

+isTextFile

+
+private boolean isTextFile(java.io.File file)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/HarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/HarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,874 @@ + + + + + + +HarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester +
+Class HarvesterThreaded

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
+
+
+
Direct Known Subclasses:
OCRHarvesterThreaded
+
+
+
+
public class HarvesterThreaded
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  intcounter + +
+           
+private static booleanDEBUG + +
+           
+protected  java.io.FiledocDir + +
+           
+protected static java.util.ArrayList<java.lang.String>excludeFolders + +
+           
+private  intfilecount + +
+           
+protected static java.util.ArrayList<java.lang.String>fileTypesToIndex + +
+           
+protected  java.io.Fileindex_dir + +
+           
+protected static booleanindexMetaPriority + +
+           
+protected  LanguageAnalyzerslanguageAnalyzers + +
+           
+protected  java.lang.StringlanguageFileName + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>languageToISO + +
+           
+private static intMAXFILES + +
+           
+protected static intmaxThread + +
+           
+protected  java.lang.StringmdProviderUrl + +
+           
+protected  ProcessFileThread[]mythreads + +
+           
+private  java.lang.StringpreferedLanguage + +
+           
+private static java.lang.StringspecialMode + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>supportedLanguageFolder + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>textLanguage + +
+           
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
HarvesterThreaded() + +
+           
HarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl) + +
+           
HarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private  java.util.ArrayList<java.lang.String>getFileListFromRDF(java.lang.String rdffilepath) + +
+           
+ voidharvestFolder() + +
+           
+ voidharvestFromRDF(java.lang.String rdffilepath) + +
+           
+private  voidindexDocs(java.util.ArrayList<java.lang.String> files) + +
+           
+(package private)  voidindexDocs(java.io.File file) + +
+           
+private  voidinit_languages() + +
+           
+private  booleanisTextFile(java.io.File file) + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>loadLanguages() + +
+           
+protected  voidprocessFile(java.io.File file) + +
+           
+protected  intwaitForFreeThread() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DEBUG

+
+private static final boolean DEBUG
+
+
+
See Also:
Constant Field Values
+
+
+ +

+MAXFILES

+
+private static final int MAXFILES
+
+
+
See Also:
Constant Field Values
+
+
+ +

+fileTypesToIndex

+
+protected static java.util.ArrayList<java.lang.String> fileTypesToIndex
+
+
+
+
+
+ +

+excludeFolders

+
+protected static java.util.ArrayList<java.lang.String> excludeFolders
+
+
+
+
+
+ +

+indexMetaPriority

+
+protected static boolean indexMetaPriority
+
+
+
+
+
+ +

+specialMode

+
+private static java.lang.String specialMode
+
+
+
+
+
+ +

+maxThread

+
+protected static int maxThread
+
+
+
+
+
+ +

+docDir

+
+protected java.io.File docDir
+
+
+
+
+
+ +

+index_dir

+
+protected java.io.File index_dir
+
+
+
+
+
+ +

+textLanguage

+
+protected java.util.HashMap<java.lang.String,java.lang.String> textLanguage
+
+
+
+
+
+ +

+languageToISO

+
+protected java.util.HashMap<java.lang.String,java.lang.String> languageToISO
+
+
+
+
+
+ +

+languageAnalyzers

+
+protected LanguageAnalyzers languageAnalyzers
+
+
+
+
+
+ +

+counter

+
+private int counter
+
+
+
+
+
+ +

+languageFileName

+
+protected java.lang.String languageFileName
+
+
+
+
+
+ +

+mythreads

+
+protected ProcessFileThread[] mythreads
+
+
+
+
+
+ +

+filecount

+
+private int filecount
+
+
+
+
+
+ +

+mdProviderUrl

+
+protected java.lang.String mdProviderUrl
+
+
+
+
+
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+
+ +

+supportedLanguageFolder

+
+protected java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterThreaded

+
+public HarvesterThreaded()
+
+
+
+ +

+HarvesterThreaded

+
+public HarvesterThreaded(java.io.File docDir,
+                         java.io.File index_dir,
+                         java.lang.String languageFileName,
+                         java.lang.String mdProviderUrl,
+                         java.lang.String lang)
+                  throws org.apache.lucene.index.CorruptIndexException,
+                         org.apache.lucene.store.LockObtainFailedException,
+                         java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+HarvesterThreaded

+
+public HarvesterThreaded(java.io.File docDir,
+                         java.io.File index_dir,
+                         java.lang.String mdProviderUrl)
+                  throws org.apache.lucene.index.CorruptIndexException,
+                         org.apache.lucene.store.LockObtainFailedException,
+                         java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+init_languages

+
+private void init_languages()
+
+
+
+
+
+
+ +

+loadLanguages

+
+protected java.util.HashMap<java.lang.String,java.lang.String> loadLanguages()
+
+
+
+
+
+
+ +

+harvestFromRDF

+
+public void harvestFromRDF(java.lang.String rdffilepath)
+                    throws java.lang.InterruptedException,
+                           org.jdom.JDOMException
+
+
+ +
Throws: +
java.lang.InterruptedException +
org.jdom.JDOMException
+
+
+
+ +

+getFileListFromRDF

+
+private java.util.ArrayList<java.lang.String> getFileListFromRDF(java.lang.String rdffilepath)
+                                                          throws org.jdom.JDOMException,
+                                                                 java.io.IOException
+
+
+ +
Throws: +
org.jdom.JDOMException +
java.io.IOException
+
+
+
+ +

+harvestFolder

+
+public void harvestFolder()
+                   throws java.lang.InterruptedException
+
+
+ +
Throws: +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+private void indexDocs(java.util.ArrayList<java.lang.String> files)
+                throws java.io.IOException,
+                       java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+void indexDocs(java.io.File file)
+         throws java.io.IOException,
+                java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+processFile

+
+protected void processFile(java.io.File file)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           org.apache.lucene.store.LockObtainFailedException,
+                           java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+
+ +

+waitForFreeThread

+
+protected int waitForFreeThread()
+
+
+
+
+
+
+ +

+isTextFile

+
+private boolean isTextFile(java.io.File file)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/OCRHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/OCRHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,396 @@ + + + + + + +OCRHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester +
+Class OCRHarvesterThreaded

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
+      extended by de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded
+
+
+
+
public class OCRHarvesterThreaded
extends HarvesterThreaded
+ + +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+private  java.lang.StringpreferedLanguage + +
+           
+ + + + + + + +
Fields inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
docDir, excludeFolders, fileTypesToIndex, index_dir, indexMetaPriority, languageAnalyzers, languageFileName, languageToISO, maxThread, mdProviderUrl, mythreads, supportedLanguageFolder, textLanguage
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
OCRHarvesterThreaded() + +
+           
OCRHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage) + +
+           
OCRHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+private  voidinit_languages() + +
+           
+protected  voidprocessFile(java.io.File file) + +
+           
+ + + + + + + +
Methods inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
harvestFolder, harvestFromRDF, indexDocs, loadLanguages, waitForFreeThread
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterThreaded

+
+public OCRHarvesterThreaded()
+
+
+
+ +

+OCRHarvesterThreaded

+
+public OCRHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String languageFileName,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String lang)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+OCRHarvesterThreaded

+
+public OCRHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String preferedLanguage)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+init_languages

+
+private void init_languages()
+
+
+
+
+
+
+ +

+processFile

+
+protected void processFile(java.io.File file)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           org.apache.lucene.store.LockObtainFailedException,
+                           java.io.IOException
+
+
+
Overrides:
processFile in class HarvesterThreaded
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester + + + + +
+Classes  + +
+DocHarvesterThreaded +
+HarvesterThreaded +
+OCRHarvesterThreaded
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,165 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.harvester +

+ + + + + + + + + + + + + + + + + +
+Class Summary
DocHarvesterThreaded 
HarvesterThreaded 
OCRHarvesterThreaded 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,155 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.harvester +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,170 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.harvester + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.harvester

+
+ + + + + + + + + +
+Packages that use de.mpiwg.dwinter.fulltextIndexer.harvester
de.mpiwg.dwinter.fulltextIndexer.harvester  
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.harvester used by de.mpiwg.dwinter.fulltextIndexer.harvester
HarvesterThreaded + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,77 @@ +K 25 +svn:wc:ra_dav:version-url +V 91 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI +END +DocHarvesterCLIMD.html +K 25 +svn:wc:ra_dav:version-url +V 114 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIMD.html +END +DocHarvesterCLIRDFMD.html +K 25 +svn:wc:ra_dav:version-url +V 117 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.html +END +OCRHarvesterCLI.html +K 25 +svn:wc:ra_dav:version-url +V 112 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLI.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 110 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-tree.html +END +OCRHarvesterCLIRDFMD.html +K 25 +svn:wc:ra_dav:version-url +V 117 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.html +END +HarvesterCLIMD.html +K 25 +svn:wc:ra_dav:version-url +V 111 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIMD.html +END +HarvesterCLIRDFMD.html +K 25 +svn:wc:ra_dav:version-url +V 114 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIRDFMD.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 108 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-use.html +END +OCRHarvesterCLIMD.html +K 25 +svn:wc:ra_dav:version-url +V 114 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 112 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-summary.html +END +HarvesterCLI.html +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLI.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,446 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DocHarvesterCLIMD.html +file + + + + +2010-06-28T09:04:52.000000Z +0fe7f872d33779173f683a7217a72cd8 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13807 + +DocHarvesterCLIRDFMD.html +file + + + + +2010-06-28T09:04:52.000000Z +cb771fc8e54eb40f5d0fef136e39af14 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14514 + +HarvesterCLI.html +file + + + + +2010-06-28T09:04:52.000000Z +35f069638c744c62c093ae4c12f306f5 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13715 + +HarvesterCLIMD.html +file + + + + +2010-06-28T09:04:52.000000Z +881f0ad9badb279ef7cdedd4d0cc06d7 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14077 + +HarvesterCLIRDFMD.html +file + + + + +2010-06-28T09:04:52.000000Z +2009fc73ab02e0b21eb1397bd17ee8be +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14454 + +OCRHarvesterCLI.html +file + + + + +2010-06-28T09:04:52.000000Z +77d8efeb90ca0eb23cb759657dc9e004 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13775 + +OCRHarvesterCLIMD.html +file + + + + +2010-06-28T09:04:52.000000Z +de583dadeb3b4ba1a778e60cc0e63916 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14149 + +OCRHarvesterCLIRDFMD.html +file + + + + +2010-06-28T09:04:52.000000Z +08dfcbb7ef828837959ffedf3832be0a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14178 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +46d134c0f79b6d9a8593b04075b0708a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +2001 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +92b5f62c8b3dfb44672e808abb349cd9 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8561 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +a24cd2cd7f8c393828ade1d103c19f73 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8279 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +5eff9db98c001aea095682c63bd100d9 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5768 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,329 @@ + + + + + + +DocHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class DocHarvesterCLIMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD
+
+
+
+
public class DocHarvesterCLIMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
DocHarvesterCLIMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DocHarvesterCLIMD

+
+public DocHarvesterCLIMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String mdProviderUrl,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String mdProviderUrl,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +DocHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class DocHarvesterCLIRDFMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD
+
+
+
+
public class DocHarvesterCLIRDFMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
DocHarvesterCLIRDFMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private static voiddoTheHarvest(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+private static voiddoTheHarvestLanguage(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+          Index all text files under a directory.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DocHarvesterCLIRDFMD

+
+public DocHarvesterCLIRDFMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
Index all text files under a directory. +

+

+ +
Throws: +
java.io.IOException +
org.apache.lucene.store.LockObtainFailedException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+private static void doTheHarvest(java.io.File rdfFile,
+                                 java.io.File docDir,
+                                 java.io.File index_dir,
+                                 java.lang.String mdProviderUrl,
+                                 java.lang.String languageFile)
+                          throws org.apache.lucene.index.CorruptIndexException,
+                                 org.apache.lucene.store.LockObtainFailedException,
+                                 java.io.IOException,
+                                 java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+private static void doTheHarvestLanguage(java.io.File rdfFile,
+                                         java.io.File docDir,
+                                         java.io.File index_dir,
+                                         java.lang.String mdProviderUrl,
+                                         java.lang.String lang)
+                                  throws org.apache.lucene.index.CorruptIndexException,
+                                         org.apache.lucene.store.LockObtainFailedException,
+                                         java.io.IOException,
+                                         java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,325 @@ + + + + + + +HarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class HarvesterCLI

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI
+
+
+
+
public class HarvesterCLI
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
HarvesterCLI() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterCLI

+
+public HarvesterCLI()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,329 @@ + + + + + + +HarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class HarvesterCLIMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD
+
+
+
+
public class HarvesterCLIMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
HarvesterCLIMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterCLIMD

+
+public HarvesterCLIMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String mdProviderUrl,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String mdProviderUrl,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +HarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class HarvesterCLIRDFMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD
+
+
+
+
public class HarvesterCLIRDFMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
HarvesterCLIRDFMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private static voiddoTheHarvest(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+private static voiddoTheHarvestLanguage(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+          Index all text files under a directory.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterCLIRDFMD

+
+public HarvesterCLIRDFMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
Index all text files under a directory. +

+

+ +
Throws: +
java.io.IOException +
org.apache.lucene.store.LockObtainFailedException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+private static void doTheHarvest(java.io.File rdfFile,
+                                 java.io.File docDir,
+                                 java.io.File index_dir,
+                                 java.lang.String mdProviderUrl,
+                                 java.lang.String languageFile)
+                          throws org.apache.lucene.index.CorruptIndexException,
+                                 org.apache.lucene.store.LockObtainFailedException,
+                                 java.io.IOException,
+                                 java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+private static void doTheHarvestLanguage(java.io.File rdfFile,
+                                         java.io.File docDir,
+                                         java.io.File index_dir,
+                                         java.lang.String mdProviderUrl,
+                                         java.lang.String lang)
+                                  throws org.apache.lucene.index.CorruptIndexException,
+                                         org.apache.lucene.store.LockObtainFailedException,
+                                         java.io.IOException,
+                                         java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,325 @@ + + + + + + +OCRHarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class OCRHarvesterCLI

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI
+
+
+
+
public class OCRHarvesterCLI
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OCRHarvesterCLI() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterCLI

+
+public OCRHarvesterCLI()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,329 @@ + + + + + + +OCRHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class OCRHarvesterCLIMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD
+
+
+
+
public class OCRHarvesterCLIMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OCRHarvesterCLIMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterCLIMD

+
+public OCRHarvesterCLIMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String mdProviderUrl,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String mdProviderUrl,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +OCRHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class OCRHarvesterCLIRDFMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD
+
+
+
+
public class OCRHarvesterCLIRDFMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OCRHarvesterCLIRDFMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private static voiddoTheHarvest(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+private static voiddoTheHarvestLanguage(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+          Index all text files under a directory.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterCLIRDFMD

+
+public OCRHarvesterCLIRDFMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
Index all text files under a directory. +

+

+ +
Throws: +
java.io.IOException +
org.apache.lucene.store.LockObtainFailedException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+private static void doTheHarvest(java.io.File rdfFile,
+                                 java.io.File docDir,
+                                 java.io.File index_dir,
+                                 java.lang.String mdProviderUrl,
+                                 java.lang.String languageFile)
+                          throws org.apache.lucene.index.CorruptIndexException,
+                                 org.apache.lucene.store.LockObtainFailedException,
+                                 java.io.IOException,
+                                 java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+private static void doTheHarvestLanguage(java.io.File rdfFile,
+                                         java.io.File docDir,
+                                         java.io.File index_dir,
+                                         java.lang.String mdProviderUrl,
+                                         java.lang.String lang)
+                                  throws org.apache.lucene.index.CorruptIndexException,
+                                         org.apache.lucene.store.LockObtainFailedException,
+                                         java.io.IOException,
+                                         java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,46 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + +
+Classes  + +
+DocHarvesterCLIMD +
+DocHarvesterCLIRDFMD +
+HarvesterCLI +
+HarvesterCLIMD +
+HarvesterCLIRDFMD +
+OCRHarvesterCLI +
+OCRHarvesterCLIMD +
+OCRHarvesterCLIRDFMD
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,185 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
DocHarvesterCLIMD 
DocHarvesterCLIRDFMD 
HarvesterCLI 
HarvesterCLIMD 
HarvesterCLIRDFMD 
OCRHarvesterCLI 
OCRHarvesterCLIMD 
OCRHarvesterCLIRDFMD 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,153 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,329 @@ + + + + + + +DocHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class DocHarvesterCLIMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD
+
+
+
+
public class DocHarvesterCLIMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
DocHarvesterCLIMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DocHarvesterCLIMD

+
+public DocHarvesterCLIMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String mdProviderUrl,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String mdProviderUrl,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +DocHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class DocHarvesterCLIRDFMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD
+
+
+
+
public class DocHarvesterCLIRDFMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
DocHarvesterCLIRDFMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private static voiddoTheHarvest(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+private static voiddoTheHarvestLanguage(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+          Index all text files under a directory.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DocHarvesterCLIRDFMD

+
+public DocHarvesterCLIRDFMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
Index all text files under a directory. +

+

+ +
Throws: +
java.io.IOException +
org.apache.lucene.store.LockObtainFailedException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+private static void doTheHarvest(java.io.File rdfFile,
+                                 java.io.File docDir,
+                                 java.io.File index_dir,
+                                 java.lang.String mdProviderUrl,
+                                 java.lang.String languageFile)
+                          throws org.apache.lucene.index.CorruptIndexException,
+                                 org.apache.lucene.store.LockObtainFailedException,
+                                 java.io.IOException,
+                                 java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+private static void doTheHarvestLanguage(java.io.File rdfFile,
+                                         java.io.File docDir,
+                                         java.io.File index_dir,
+                                         java.lang.String mdProviderUrl,
+                                         java.lang.String lang)
+                                  throws org.apache.lucene.index.CorruptIndexException,
+                                         org.apache.lucene.store.LockObtainFailedException,
+                                         java.io.IOException,
+                                         java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLI.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLI.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,325 @@ + + + + + + +HarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class HarvesterCLI

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI
+
+
+
+
public class HarvesterCLI
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
HarvesterCLI() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterCLI

+
+public HarvesterCLI()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,329 @@ + + + + + + +HarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class HarvesterCLIMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD
+
+
+
+
public class HarvesterCLIMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
HarvesterCLIMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterCLIMD

+
+public HarvesterCLIMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String mdProviderUrl,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String mdProviderUrl,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIRDFMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIRDFMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +HarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class HarvesterCLIRDFMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD
+
+
+
+
public class HarvesterCLIRDFMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
HarvesterCLIRDFMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private static voiddoTheHarvest(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+private static voiddoTheHarvestLanguage(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+          Index all text files under a directory.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterCLIRDFMD

+
+public HarvesterCLIRDFMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
Index all text files under a directory. +

+

+ +
Throws: +
java.io.IOException +
org.apache.lucene.store.LockObtainFailedException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+private static void doTheHarvest(java.io.File rdfFile,
+                                 java.io.File docDir,
+                                 java.io.File index_dir,
+                                 java.lang.String mdProviderUrl,
+                                 java.lang.String languageFile)
+                          throws org.apache.lucene.index.CorruptIndexException,
+                                 org.apache.lucene.store.LockObtainFailedException,
+                                 java.io.IOException,
+                                 java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+private static void doTheHarvestLanguage(java.io.File rdfFile,
+                                         java.io.File docDir,
+                                         java.io.File index_dir,
+                                         java.lang.String mdProviderUrl,
+                                         java.lang.String lang)
+                                  throws org.apache.lucene.index.CorruptIndexException,
+                                         org.apache.lucene.store.LockObtainFailedException,
+                                         java.io.IOException,
+                                         java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLI.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLI.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,325 @@ + + + + + + +OCRHarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class OCRHarvesterCLI

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI
+
+
+
+
public class OCRHarvesterCLI
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OCRHarvesterCLI() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterCLI

+
+public OCRHarvesterCLI()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,329 @@ + + + + + + +OCRHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class OCRHarvesterCLIMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD
+
+
+
+
public class OCRHarvesterCLIMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OCRHarvesterCLIMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected static voiddoTheHarvest(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+protected static voiddoTheHarvestLanguage(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterCLIMD

+
+public OCRHarvesterCLIMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+protected static void doTheHarvestLanguage(java.io.File docDir,
+                                           java.io.File index_dir,
+                                           java.lang.String mdProviderUrl,
+                                           java.lang.String lang)
+                                    throws org.apache.lucene.index.CorruptIndexException,
+                                           org.apache.lucene.store.LockObtainFailedException,
+                                           java.io.IOException,
+                                           java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+protected static void doTheHarvest(java.io.File docDir,
+                                   java.io.File index_dir,
+                                   java.lang.String mdProviderUrl,
+                                   java.lang.String languageFile)
+                            throws org.apache.lucene.index.CorruptIndexException,
+                                   org.apache.lucene.store.LockObtainFailedException,
+                                   java.io.IOException,
+                                   java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +OCRHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+Class OCRHarvesterCLIRDFMD

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD
+
+
+
+
public class OCRHarvesterCLIRDFMD
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OCRHarvesterCLIRDFMD() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private static voiddoTheHarvest(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String languageFile) + +
+           
+private static voiddoTheHarvestLanguage(java.io.File rdfFile, + java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+static voidmain(java.lang.String[] args) + +
+          Index all text files under a directory.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterCLIRDFMD

+
+public OCRHarvesterCLIRDFMD()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException,
+                        java.lang.InterruptedException
+
+
Index all text files under a directory. +

+

+ +
Throws: +
java.io.IOException +
org.apache.lucene.store.LockObtainFailedException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvest

+
+private static void doTheHarvest(java.io.File rdfFile,
+                                 java.io.File docDir,
+                                 java.io.File index_dir,
+                                 java.lang.String mdProviderUrl,
+                                 java.lang.String languageFile)
+                          throws org.apache.lucene.index.CorruptIndexException,
+                                 org.apache.lucene.store.LockObtainFailedException,
+                                 java.io.IOException,
+                                 java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+doTheHarvestLanguage

+
+private static void doTheHarvestLanguage(java.io.File rdfFile,
+                                         java.io.File docDir,
+                                         java.io.File index_dir,
+                                         java.lang.String mdProviderUrl,
+                                         java.lang.String lang)
+                                  throws org.apache.lucene.index.CorruptIndexException,
+                                         org.apache.lucene.store.LockObtainFailedException,
+                                         java.io.IOException,
+                                         java.lang.InterruptedException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException +
java.lang.InterruptedException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,53 @@ +K 25 +svn:wc:ra_dav:version-url +V 101 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use +END +DocHarvesterCLIMD.html +K 25 +svn:wc:ra_dav:version-url +V 124 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/DocHarvesterCLIMD.html +END +DocHarvesterCLIRDFMD.html +K 25 +svn:wc:ra_dav:version-url +V 127 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/DocHarvesterCLIRDFMD.html +END +OCRHarvesterCLI.html +K 25 +svn:wc:ra_dav:version-url +V 122 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLI.html +END +OCRHarvesterCLIRDFMD.html +K 25 +svn:wc:ra_dav:version-url +V 127 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLIRDFMD.html +END +HarvesterCLIMD.html +K 25 +svn:wc:ra_dav:version-url +V 121 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLIMD.html +END +HarvesterCLIRDFMD.html +K 25 +svn:wc:ra_dav:version-url +V 124 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLIRDFMD.html +END +OCRHarvesterCLIMD.html +K 25 +svn:wc:ra_dav:version-url +V 124 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLIMD.html +END +HarvesterCLI.html +K 25 +svn:wc:ra_dav:version-url +V 119 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLI.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,307 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DocHarvesterCLIMD.html +file + + + + +2010-06-28T09:04:53.000000Z +177aeb552ec0a93f6cf3e5f98d90b5fb +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6287 + +DocHarvesterCLIRDFMD.html +file + + + + +2010-06-28T09:04:53.000000Z +2c3ec2b56b7fc17f3322b6f97007c4c2 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6317 + +HarvesterCLI.html +file + + + + +2010-06-28T09:04:53.000000Z +370c1551b3248c9780275892a52ce4ae +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6237 + +HarvesterCLIMD.html +file + + + + +2010-06-28T09:04:53.000000Z +a49bc420d8ecafae28991e53767cc85a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6257 + +HarvesterCLIRDFMD.html +file + + + + +2010-06-28T09:04:53.000000Z +2e49605f7b06bbe701efa23e59edbf60 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6287 + +OCRHarvesterCLI.html +file + + + + +2010-06-28T09:04:53.000000Z +22c854e7454fa57ec65a5f8ed3ac5632 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6267 + +OCRHarvesterCLIMD.html +file + + + + +2010-06-28T09:04:53.000000Z +893f9ab2c0b53c1b34e740d1cbcd32ed +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6287 + +OCRHarvesterCLIRDFMD.html +file + + + + +2010-06-28T09:04:53.000000Z +5706f76d1f01daf325ad0b7c22a6f4b7 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6317 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/DocHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/DocHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/DocHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/DocHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/HarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/HarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/HarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/HarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/HarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/HarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/OCRHarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/OCRHarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/OCRHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/OCRHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/OCRHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/prop-base/OCRHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/DocHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/DocHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/DocHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/DocHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/HarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/HarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/HarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/HarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/HarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/HarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/OCRHarvesterCLI.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/OCRHarvesterCLI.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/OCRHarvesterCLIMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/OCRHarvesterCLIMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/OCRHarvesterCLIRDFMD.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/.svn/text-base/OCRHarvesterCLIRDFMD.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/DocHarvesterCLIMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/DocHarvesterCLIMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/DocHarvesterCLIRDFMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/DocHarvesterCLIRDFMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLI.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLI.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLIMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLIMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLIRDFMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/HarvesterCLIRDFMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLI.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLI.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLIMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLIMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLIRDFMD.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/class-use/OCRHarvesterCLIRDFMD.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,46 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + +
+Classes  + +
+DocHarvesterCLIMD +
+DocHarvesterCLIRDFMD +
+HarvesterCLI +
+HarvesterCLIMD +
+HarvesterCLIRDFMD +
+OCRHarvesterCLI +
+OCRHarvesterCLIMD +
+OCRHarvesterCLIRDFMD
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,185 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
DocHarvesterCLIMD 
DocHarvesterCLIRDFMD 
HarvesterCLI 
HarvesterCLIMD 
HarvesterCLIRDFMD 
OCRHarvesterCLI 
OCRHarvesterCLIMD 
OCRHarvesterCLIRDFMD 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,153 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/DocHarvesterThreaded.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/DocHarvesterThreaded.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,1041 @@ + + + + + + +DocHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester +
+Class DocHarvesterThreaded

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded
+
+
+
+
public class DocHarvesterThreaded
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private static java.lang.StringCOMPOSEDFN + +
+           
+private  intcounter + +
+           
+private static booleanCREATE_NEW + +
+           
+private static booleanDEBUG + +
+           
+protected  java.io.FiledocDir + +
+           
+protected static java.util.ArrayList<java.lang.String>excludeFolders + +
+           
+private  intfilecount + +
+           
+protected static java.util.ArrayList<java.lang.String>fileTypesToIndex + +
+           
+protected  java.io.Fileindex_dir + +
+           
+protected static booleanindexMetaPriority + +
+           
+protected  LanguageAnalyzerslanguageAnalyzers + +
+           
+protected  java.lang.StringlanguageFileName + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>languageToISO + +
+           
+private static intMAXFILES + +
+           
+protected static intmaxThread + +
+           
+protected  java.lang.StringmdProviderUrl + +
+           
+protected  ProcessFileThread[]mythreads + +
+           
+private  java.lang.StringpreferedLanguage + +
+           
+private static java.lang.StringPREFIX + +
+           
+private static java.lang.StringspecialMode + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>supportedLanguageFolder + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>textLanguage + +
+           
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
DocHarvesterThreaded() + +
+           
DocHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl) + +
+           
DocHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private  booleancheckFileExists(java.io.File folder) + +
+           
+private  voidclearFile(java.io.File folder) + +
+           
+private  voidcompose(java.io.File folder, + java.io.File file) + +
+           
+private  voidfinishFile(java.io.File folder) + +
+           
+private  java.io.FilegetComposedFile(java.io.File folder) + +
+           
+private  java.util.ArrayList<java.lang.String>getFileListFromRDF(java.lang.String rdffilepath) + +
+           
+ voidharvestFolder() + +
+           
+ voidharvestFromRDF(java.lang.String rdffilepath) + +
+           
+private  voidindexDocs(java.util.ArrayList<java.lang.String> files) + +
+           
+(package private)  voidindexDocs(java.io.File file) + +
+           
+private  voidinit_languages() + +
+           
+private  booleanisTextFile(java.io.File file) + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>loadLanguages() + +
+           
+private  voidprocessCompleteFile(java.io.File folder) + +
+           
+protected  voidprocessFile(java.io.File file) + +
+           
+protected  intwaitForFreeThread() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DEBUG

+
+private static final boolean DEBUG
+
+
+
See Also:
Constant Field Values
+
+
+ +

+MAXFILES

+
+private static final int MAXFILES
+
+
+
See Also:
Constant Field Values
+
+
+ +

+PREFIX

+
+private static final java.lang.String PREFIX
+
+
+
See Also:
Constant Field Values
+
+
+ +

+COMPOSEDFN

+
+private static final java.lang.String COMPOSEDFN
+
+
+
See Also:
Constant Field Values
+
+
+ +

+CREATE_NEW

+
+private static final boolean CREATE_NEW
+
+
+
See Also:
Constant Field Values
+
+
+ +

+fileTypesToIndex

+
+protected static java.util.ArrayList<java.lang.String> fileTypesToIndex
+
+
+
+
+
+ +

+excludeFolders

+
+protected static java.util.ArrayList<java.lang.String> excludeFolders
+
+
+
+
+
+ +

+indexMetaPriority

+
+protected static boolean indexMetaPriority
+
+
+
+
+
+ +

+specialMode

+
+private static java.lang.String specialMode
+
+
+
+
+
+ +

+maxThread

+
+protected static int maxThread
+
+
+
+
+
+ +

+docDir

+
+protected java.io.File docDir
+
+
+
+
+
+ +

+index_dir

+
+protected java.io.File index_dir
+
+
+
+
+
+ +

+textLanguage

+
+protected java.util.HashMap<java.lang.String,java.lang.String> textLanguage
+
+
+
+
+
+ +

+languageToISO

+
+protected java.util.HashMap<java.lang.String,java.lang.String> languageToISO
+
+
+
+
+
+ +

+languageAnalyzers

+
+protected LanguageAnalyzers languageAnalyzers
+
+
+
+
+
+ +

+counter

+
+private int counter
+
+
+
+
+
+ +

+languageFileName

+
+protected java.lang.String languageFileName
+
+
+
+
+
+ +

+mythreads

+
+protected ProcessFileThread[] mythreads
+
+
+
+
+
+ +

+filecount

+
+private int filecount
+
+
+
+
+
+ +

+mdProviderUrl

+
+protected java.lang.String mdProviderUrl
+
+
+
+
+
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+
+ +

+supportedLanguageFolder

+
+protected java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+DocHarvesterThreaded

+
+public DocHarvesterThreaded()
+
+
+
+ +

+DocHarvesterThreaded

+
+public DocHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String languageFileName,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String lang)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+DocHarvesterThreaded

+
+public DocHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String mdProviderUrl)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+init_languages

+
+private void init_languages()
+
+
+
+
+
+
+ +

+loadLanguages

+
+protected java.util.HashMap<java.lang.String,java.lang.String> loadLanguages()
+
+
+
+
+
+
+ +

+harvestFromRDF

+
+public void harvestFromRDF(java.lang.String rdffilepath)
+                    throws java.lang.InterruptedException,
+                           org.jdom.JDOMException
+
+
+ +
Throws: +
java.lang.InterruptedException +
org.jdom.JDOMException
+
+
+
+ +

+getFileListFromRDF

+
+private java.util.ArrayList<java.lang.String> getFileListFromRDF(java.lang.String rdffilepath)
+                                                          throws org.jdom.JDOMException,
+                                                                 java.io.IOException
+
+
+ +
Throws: +
org.jdom.JDOMException +
java.io.IOException
+
+
+
+ +

+harvestFolder

+
+public void harvestFolder()
+                   throws java.lang.InterruptedException
+
+
+ +
Throws: +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+private void indexDocs(java.util.ArrayList<java.lang.String> files)
+                throws java.io.IOException,
+                       java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+void indexDocs(java.io.File file)
+         throws java.io.IOException,
+                java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+finishFile

+
+private void finishFile(java.io.File folder)
+
+
+
+
+
+
+ +

+processCompleteFile

+
+private void processCompleteFile(java.io.File folder)
+
+
+
+
+
+
+ +

+checkFileExists

+
+private boolean checkFileExists(java.io.File folder)
+
+
+
+
+
+
+ +

+clearFile

+
+private void clearFile(java.io.File folder)
+
+
+
+
+
+
+ +

+compose

+
+private void compose(java.io.File folder,
+                     java.io.File file)
+
+
+
+
+
+
+ +

+getComposedFile

+
+private java.io.File getComposedFile(java.io.File folder)
+
+
+
+
+
+
+ +

+processFile

+
+protected void processFile(java.io.File file)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           org.apache.lucene.store.LockObtainFailedException,
+                           java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+
+ +

+waitForFreeThread

+
+protected int waitForFreeThread()
+
+
+
+
+
+
+ +

+isTextFile

+
+private boolean isTextFile(java.io.File file)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/HarvesterThreaded.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/HarvesterThreaded.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,874 @@ + + + + + + +HarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester +
+Class HarvesterThreaded

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
+
+
+
Direct Known Subclasses:
OCRHarvesterThreaded
+
+
+
+
public class HarvesterThreaded
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  intcounter + +
+           
+private static booleanDEBUG + +
+           
+protected  java.io.FiledocDir + +
+           
+protected static java.util.ArrayList<java.lang.String>excludeFolders + +
+           
+private  intfilecount + +
+           
+protected static java.util.ArrayList<java.lang.String>fileTypesToIndex + +
+           
+protected  java.io.Fileindex_dir + +
+           
+protected static booleanindexMetaPriority + +
+           
+protected  LanguageAnalyzerslanguageAnalyzers + +
+           
+protected  java.lang.StringlanguageFileName + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>languageToISO + +
+           
+private static intMAXFILES + +
+           
+protected static intmaxThread + +
+           
+protected  java.lang.StringmdProviderUrl + +
+           
+protected  ProcessFileThread[]mythreads + +
+           
+private  java.lang.StringpreferedLanguage + +
+           
+private static java.lang.StringspecialMode + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>supportedLanguageFolder + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>textLanguage + +
+           
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
HarvesterThreaded() + +
+           
HarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl) + +
+           
HarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private  java.util.ArrayList<java.lang.String>getFileListFromRDF(java.lang.String rdffilepath) + +
+           
+ voidharvestFolder() + +
+           
+ voidharvestFromRDF(java.lang.String rdffilepath) + +
+           
+private  voidindexDocs(java.util.ArrayList<java.lang.String> files) + +
+           
+(package private)  voidindexDocs(java.io.File file) + +
+           
+private  voidinit_languages() + +
+           
+private  booleanisTextFile(java.io.File file) + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>loadLanguages() + +
+           
+protected  voidprocessFile(java.io.File file) + +
+           
+protected  intwaitForFreeThread() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+DEBUG

+
+private static final boolean DEBUG
+
+
+
See Also:
Constant Field Values
+
+
+ +

+MAXFILES

+
+private static final int MAXFILES
+
+
+
See Also:
Constant Field Values
+
+
+ +

+fileTypesToIndex

+
+protected static java.util.ArrayList<java.lang.String> fileTypesToIndex
+
+
+
+
+
+ +

+excludeFolders

+
+protected static java.util.ArrayList<java.lang.String> excludeFolders
+
+
+
+
+
+ +

+indexMetaPriority

+
+protected static boolean indexMetaPriority
+
+
+
+
+
+ +

+specialMode

+
+private static java.lang.String specialMode
+
+
+
+
+
+ +

+maxThread

+
+protected static int maxThread
+
+
+
+
+
+ +

+docDir

+
+protected java.io.File docDir
+
+
+
+
+
+ +

+index_dir

+
+protected java.io.File index_dir
+
+
+
+
+
+ +

+textLanguage

+
+protected java.util.HashMap<java.lang.String,java.lang.String> textLanguage
+
+
+
+
+
+ +

+languageToISO

+
+protected java.util.HashMap<java.lang.String,java.lang.String> languageToISO
+
+
+
+
+
+ +

+languageAnalyzers

+
+protected LanguageAnalyzers languageAnalyzers
+
+
+
+
+
+ +

+counter

+
+private int counter
+
+
+
+
+
+ +

+languageFileName

+
+protected java.lang.String languageFileName
+
+
+
+
+
+ +

+mythreads

+
+protected ProcessFileThread[] mythreads
+
+
+
+
+
+ +

+filecount

+
+private int filecount
+
+
+
+
+
+ +

+mdProviderUrl

+
+protected java.lang.String mdProviderUrl
+
+
+
+
+
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+
+ +

+supportedLanguageFolder

+
+protected java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+HarvesterThreaded

+
+public HarvesterThreaded()
+
+
+
+ +

+HarvesterThreaded

+
+public HarvesterThreaded(java.io.File docDir,
+                         java.io.File index_dir,
+                         java.lang.String languageFileName,
+                         java.lang.String mdProviderUrl,
+                         java.lang.String lang)
+                  throws org.apache.lucene.index.CorruptIndexException,
+                         org.apache.lucene.store.LockObtainFailedException,
+                         java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+HarvesterThreaded

+
+public HarvesterThreaded(java.io.File docDir,
+                         java.io.File index_dir,
+                         java.lang.String mdProviderUrl)
+                  throws org.apache.lucene.index.CorruptIndexException,
+                         org.apache.lucene.store.LockObtainFailedException,
+                         java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+init_languages

+
+private void init_languages()
+
+
+
+
+
+
+ +

+loadLanguages

+
+protected java.util.HashMap<java.lang.String,java.lang.String> loadLanguages()
+
+
+
+
+
+
+ +

+harvestFromRDF

+
+public void harvestFromRDF(java.lang.String rdffilepath)
+                    throws java.lang.InterruptedException,
+                           org.jdom.JDOMException
+
+
+ +
Throws: +
java.lang.InterruptedException +
org.jdom.JDOMException
+
+
+
+ +

+getFileListFromRDF

+
+private java.util.ArrayList<java.lang.String> getFileListFromRDF(java.lang.String rdffilepath)
+                                                          throws org.jdom.JDOMException,
+                                                                 java.io.IOException
+
+
+ +
Throws: +
org.jdom.JDOMException +
java.io.IOException
+
+
+
+ +

+harvestFolder

+
+public void harvestFolder()
+                   throws java.lang.InterruptedException
+
+
+ +
Throws: +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+private void indexDocs(java.util.ArrayList<java.lang.String> files)
+                throws java.io.IOException,
+                       java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+void indexDocs(java.io.File file)
+         throws java.io.IOException,
+                java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+processFile

+
+protected void processFile(java.io.File file)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           org.apache.lucene.store.LockObtainFailedException,
+                           java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+
+ +

+waitForFreeThread

+
+protected int waitForFreeThread()
+
+
+
+
+
+
+ +

+isTextFile

+
+private boolean isTextFile(java.io.File file)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,396 @@ + + + + + + +OCRHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester +
+Class OCRHarvesterThreaded

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
+      extended by de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded
+
+
+
+
public class OCRHarvesterThreaded
extends HarvesterThreaded
+ + +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+private  java.lang.StringpreferedLanguage + +
+           
+ + + + + + + +
Fields inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
docDir, excludeFolders, fileTypesToIndex, index_dir, indexMetaPriority, languageAnalyzers, languageFileName, languageToISO, maxThread, mdProviderUrl, mythreads, supportedLanguageFolder, textLanguage
+  + + + + + + + + + + + + + + + + +
+Constructor Summary
OCRHarvesterThreaded() + +
+           
OCRHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage) + +
+           
OCRHarvesterThreaded(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.lang.String mdProviderUrl, + java.lang.String lang) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+private  voidinit_languages() + +
+           
+protected  voidprocessFile(java.io.File file) + +
+           
+ + + + + + + +
Methods inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded
harvestFolder, harvestFromRDF, indexDocs, loadLanguages, waitForFreeThread
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+OCRHarvesterThreaded

+
+public OCRHarvesterThreaded()
+
+
+
+ +

+OCRHarvesterThreaded

+
+public OCRHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String languageFileName,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String lang)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+OCRHarvesterThreaded

+
+public OCRHarvesterThreaded(java.io.File docDir,
+                            java.io.File index_dir,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String preferedLanguage)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            org.apache.lucene.store.LockObtainFailedException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+init_languages

+
+private void init_languages()
+
+
+
+
+
+
+ +

+processFile

+
+protected void processFile(java.io.File file)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           org.apache.lucene.store.LockObtainFailedException,
+                           java.io.IOException
+
+
+
Overrides:
processFile in class HarvesterThreaded
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 97 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use +END +DocHarvesterThreaded.html +K 25 +svn:wc:ra_dav:version-url +V 123 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/DocHarvesterThreaded.html +END +OCRHarvesterThreaded.html +K 25 +svn:wc:ra_dav:version-url +V 123 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/OCRHarvesterThreaded.html +END +HarvesterThreaded.html +K 25 +svn:wc:ra_dav:version-url +V 120 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/HarvesterThreaded.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,137 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DocHarvesterThreaded.html +file + + + + +2010-06-28T09:04:53.000000Z +a67198a48f330bd86c6e1614d2fa88b4 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6226 + +HarvesterThreaded.html +file + + + + +2010-06-28T09:04:53.000000Z +8e8e4ec9c8f8286fbc0192b22dfc1e6e +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8266 + +OCRHarvesterThreaded.html +file + + + + +2010-06-28T09:04:53.000000Z +aa2a5caa26eacfcb77b95a8ac2fb5d35 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6226 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/prop-base/DocHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/prop-base/DocHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/prop-base/HarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/prop-base/HarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/prop-base/OCRHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/prop-base/OCRHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/text-base/DocHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/text-base/DocHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/text-base/HarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/text-base/HarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,180 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded

+
+ + + + + + + + + +
+Packages that use HarvesterThreaded
de.mpiwg.dwinter.fulltextIndexer.harvester  
+  +

+ + + + + +
+Uses of HarvesterThreaded in de.mpiwg.dwinter.fulltextIndexer.harvester
+  +

+ + + + + + + + + +
Subclasses of HarvesterThreaded in de.mpiwg.dwinter.fulltextIndexer.harvester
+ classOCRHarvesterThreaded + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/text-base/OCRHarvesterThreaded.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/.svn/text-base/OCRHarvesterThreaded.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/DocHarvesterThreaded.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/DocHarvesterThreaded.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/HarvesterThreaded.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/HarvesterThreaded.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,180 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded

+
+ + + + + + + + + +
+Packages that use HarvesterThreaded
de.mpiwg.dwinter.fulltextIndexer.harvester  
+  +

+ + + + + +
+Uses of HarvesterThreaded in de.mpiwg.dwinter.fulltextIndexer.harvester
+  +

+ + + + + + + + + +
Subclasses of HarvesterThreaded in de.mpiwg.dwinter.fulltextIndexer.harvester
+ classOCRHarvesterThreaded + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/OCRHarvesterThreaded.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/class-use/OCRHarvesterThreaded.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester + + + + +
+Classes  + +
+DocHarvesterThreaded +
+HarvesterThreaded +
+OCRHarvesterThreaded
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,165 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.harvester +

+ + + + + + + + + + + + + + + + + +
+Class Summary
DocHarvesterThreaded 
HarvesterThreaded 
OCRHarvesterThreaded 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,155 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.harvester +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,170 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.harvester + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.harvester

+
+ + + + + + + + + +
+Packages that use de.mpiwg.dwinter.fulltextIndexer.harvester
de.mpiwg.dwinter.fulltextIndexer.harvester  
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.harvester used by de.mpiwg.dwinter.fulltextIndexer.harvester
HarvesterThreaded + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,41 @@ +K 25 +svn:wc:ra_dav:version-url +V 98 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors +END +OCRProcessFileThread.html +K 25 +svn:wc:ra_dav:version-url +V 124 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/OCRProcessFileThread.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 117 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-frame.html +END +ProcessFileThread.html +K 25 +svn:wc:ra_dav:version-url +V 121 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/ProcessFileThread.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 116 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-tree.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 115 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 119 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-summary.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,242 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRProcessFileThread.html +file + + + + +2010-06-28T09:04:51.000000Z +fcd873b3ce7e79823f757fb5a7e85455 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +20858 + +ProcessFileThread.html +file + + + + +2010-06-28T09:04:51.000000Z +a18d89912042c50b38dadfc6ae2926f8 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +45166 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +68fe536000c5587e265fff33433f65f2 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1186 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +3692407e21091f53333f20bcd6c6f15d +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6960 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +eea6816730c4167fbf19add3c8a3331a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6913 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +aa3e8daa71143633d4382879d72d2aca +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8406 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/OCRProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/OCRProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/ProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/ProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/OCRProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/OCRProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,357 @@ + + + + + + +OCRProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.processors +
+Class OCRProcessFileThread

+
+java.lang.Object
+  extended by java.lang.Thread
+      extended by de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
+          extended by de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread
+
+
+
All Implemented Interfaces:
java.lang.Runnable
+
+
+
+
public class OCRProcessFileThread
extends ProcessFileThread
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.lang.Thread
java.lang.Thread.State, java.lang.Thread.UncaughtExceptionHandler
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
deduceFromFolderPriority, docDir, done, excludeFolders, fileTypesToIndex, index_dir, indexMetaPriority, languageAnalyzers, languageFileName, languageToISO, supportedLanguageFolder
+ + + + + + + +
Fields inherited from class java.lang.Thread
MAX_PRIORITY, MIN_PRIORITY, NORM_PRIORITY
+  + + + + + + + + + + +
+Constructor Summary
OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
+  + + + + + + + + + + + +
+Method Summary
+protected  java.lang.BooleanaddDocument(java.io.File file, + java.lang.String lang, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+ + + + + + + +
Methods inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
getIndexMetaPriority, harvestFolder, indexDocs, loadLanguages, run, setIndexMetaPriority
+ + + + + + + +
Methods inherited from class java.lang.Thread
activeCount, checkAccess, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRProcessFileThread

+
+public OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2,
+                            java.io.File file,
+                            java.lang.String lfn,
+                            java.util.HashMap<java.lang.String,java.lang.String> tl,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String preferedLanguage,
+                            java.util.HashMap<java.lang.String,java.lang.String> languageToISO,
+                            java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder)
+
+
+ + + + + + + + +
+Method Detail
+ +

+addDocument

+
+protected java.lang.Boolean addDocument(java.io.File file,
+                                        java.lang.String lang,
+                                        java.lang.String dcMetaData,
+                                        java.lang.String textId)
+                                 throws org.apache.lucene.index.CorruptIndexException,
+                                        java.io.IOException,
+                                        java.io.FileNotFoundException,
+                                        java.io.UnsupportedEncodingException,
+                                        javax.xml.parsers.ParserConfigurationException,
+                                        org.xml.sax.SAXException
+
+
+
Overrides:
addDocument in class ProcessFileThread
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException +
javax.xml.parsers.ParserConfigurationException +
org.xml.sax.SAXException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/ProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/ProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,1146 @@ + + + + + + +ProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.processors +
+Class ProcessFileThread

+
+java.lang.Object
+  extended by java.lang.Thread
+      extended by de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
+
+
+
All Implemented Interfaces:
java.lang.Runnable
+
+
+
Direct Known Subclasses:
OCRProcessFileThread
+
+
+
+
public class ProcessFileThread
extends java.lang.Thread
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.lang.Thread
java.lang.Thread.State, java.lang.Thread.UncaughtExceptionHandler
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  intcounter + +
+           
+protected  booleandeduceFromFolderPriority + +
+           
+private static intDELETED_OLD_VERSION + +
+           
+private static intDELETED_WRONG_LANGUAGE + +
+           
+protected  java.io.FiledocDir + +
+           
+ booleandone + +
+           
+protected  java.util.ArrayList<java.lang.String>excludeFolders + +
+           
+private static intFILE_EXISTS + +
+           
+protected  java.util.ArrayList<java.lang.String>fileTypesToIndex + +
+           
+protected  java.io.Fileindex_dir + +
+           
+protected  booleanindexMetaPriority + +
+           
+protected  LanguageAnalyzerslanguageAnalyzers + +
+           
+protected  java.lang.StringlanguageFileName + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>languageToISO + +
+           
+private  java.lang.StringmdProviderUrl + +
+           
+private  java.lang.Stringmode + +
+           
+private static intNEW_FILE + +
+           
+private  java.lang.StringpreferedLanguage + +
+           
+private  java.io.FileprocessThisFile + +
+           
+private  java.lang.StringspecialMode + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>supportedLanguageFolder + +
+           
+private static java.lang.StringTEXTIDFROMPATH_REGEXP + +
+           
+private  java.util.HashMap<java.lang.String,java.lang.String>textLanguage + +
+           
+ + + + + + + +
Fields inherited from class java.lang.Thread
MAX_PRIORITY, MIN_PRIORITY, NORM_PRIORITY
+  + + + + + + + + + + + + + +
+Constructor Summary
ProcessFileThread(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.io.File processThisFile, + java.lang.String mdProviderUrl, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
ProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private  java.lang.StringabsPathToTextId(java.lang.String absolutePath) + +
+           
+protected  java.lang.BooleanaddDocument(java.io.File file, + java.lang.String lang, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+private  intcheckFileAndRemoveOldFile(java.lang.String filePath, + java.lang.String lang, + boolean deleteWrongLanguage, + long fileModDate) + +
+           
+private  java.lang.StringcheckSupportedLanguages(java.lang.String lang) + +
+           
+private  java.lang.StringdeduceFromFolderName(java.io.File file) + +
+           
+private  java.lang.StringgetDCFromIndexMeta(java.lang.String textId) + +
+           
+ booleangetIndexMetaPriority() + +
+           
+private  java.lang.StringgetLanguageFromIndexMeta(java.io.File file) + +
+           
+private  java.lang.StringgetLanguageOfText(java.lang.String textId, + java.io.File file) + +
+           
+private  java.lang.StringgetTextId(java.io.File file) + +
+           
+ voidharvestFolder() + +
+           
+(package private)  voidindexDocs(java.io.File file) + +
+           
+private  booleanisTextFile(java.io.File file) + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>loadLanguages() + +
+           
+private  voidprocessFile(java.io.File file) + +
+           
+ voidrun() + +
+           
+ voidsetIndexMetaPriority(boolean prio) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Thread
activeCount, checkAccess, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+TEXTIDFROMPATH_REGEXP

+
+private static final java.lang.String TEXTIDFROMPATH_REGEXP
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DELETED_WRONG_LANGUAGE

+
+private static final int DELETED_WRONG_LANGUAGE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DELETED_OLD_VERSION

+
+private static final int DELETED_OLD_VERSION
+
+
+
See Also:
Constant Field Values
+
+
+ +

+NEW_FILE

+
+private static final int NEW_FILE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+FILE_EXISTS

+
+private static final int FILE_EXISTS
+
+
+
See Also:
Constant Field Values
+
+
+ +

+docDir

+
+protected java.io.File docDir
+
+
+
+
+
+ +

+index_dir

+
+protected java.io.File index_dir
+
+
+
+
+
+ +

+fileTypesToIndex

+
+protected java.util.ArrayList<java.lang.String> fileTypesToIndex
+
+
+
+
+
+ +

+excludeFolders

+
+protected java.util.ArrayList<java.lang.String> excludeFolders
+
+
+
+
+
+ +

+textLanguage

+
+private java.util.HashMap<java.lang.String,java.lang.String> textLanguage
+
+
+
+
+
+ +

+languageToISO

+
+protected java.util.HashMap<java.lang.String,java.lang.String> languageToISO
+
+
+
+
+
+ +

+languageAnalyzers

+
+protected LanguageAnalyzers languageAnalyzers
+
+
+
+
+
+ +

+supportedLanguageFolder

+
+protected java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder
+
+
+
+
+
+ +

+counter

+
+private int counter
+
+
+
+
+
+ +

+languageFileName

+
+protected java.lang.String languageFileName
+
+
+
+
+
+ +

+indexMetaPriority

+
+protected boolean indexMetaPriority
+
+
+
+
+
+ +

+deduceFromFolderPriority

+
+protected boolean deduceFromFolderPriority
+
+
+
+
+
+ +

+specialMode

+
+private java.lang.String specialMode
+
+
+
+
+
+ +

+done

+
+public boolean done
+
+
+
+
+
+ +

+processThisFile

+
+private java.io.File processThisFile
+
+
+
+
+
+ +

+mode

+
+private java.lang.String mode
+
+
+
+
+
+ +

+mdProviderUrl

+
+private java.lang.String mdProviderUrl
+
+
+
+
+
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ProcessFileThread

+
+public ProcessFileThread(java.io.File docDir,
+                         java.io.File index_dir,
+                         java.lang.String languageFileName,
+                         java.io.File processThisFile,
+                         java.lang.String mdProviderUrl,
+                         java.util.HashMap<java.lang.String,java.lang.String> languageToISO,
+                         java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder)
+                  throws org.apache.lucene.index.CorruptIndexException,
+                         org.apache.lucene.store.LockObtainFailedException,
+                         java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+ProcessFileThread

+
+public ProcessFileThread(LanguageAnalyzers languageAnalyzers2,
+                         java.io.File file,
+                         java.lang.String lfn,
+                         java.util.HashMap<java.lang.String,java.lang.String> tl,
+                         java.lang.String mdProviderUrl,
+                         java.lang.String preferedLanguage,
+                         java.util.HashMap<java.lang.String,java.lang.String> languageToISO,
+                         java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder)
+
+
+ + + + + + + + +
+Method Detail
+ +

+run

+
+public void run()
+
+
+
Specified by:
run in interface java.lang.Runnable
Overrides:
run in class java.lang.Thread
+
+
+
+
+
+
+ +

+getLanguageOfText

+
+private java.lang.String getLanguageOfText(java.lang.String textId,
+                                           java.io.File file)
+                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+deduceFromFolderName

+
+private java.lang.String deduceFromFolderName(java.io.File file)
+
+
+
+
+
+
+ +

+getLanguageFromIndexMeta

+
+private java.lang.String getLanguageFromIndexMeta(java.io.File file)
+                                           throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+getDCFromIndexMeta

+
+private java.lang.String getDCFromIndexMeta(java.lang.String textId)
+                                     throws java.io.IOException,
+                                            org.apache.xmlrpc.XmlRpcException
+
+
+ +
Throws: +
java.io.IOException +
org.apache.xmlrpc.XmlRpcException
+
+
+
+ +

+loadLanguages

+
+protected java.util.HashMap<java.lang.String,java.lang.String> loadLanguages()
+
+
+
+
+
+
+ +

+harvestFolder

+
+public void harvestFolder()
+                   throws java.lang.InterruptedException
+
+
+ +
Throws: +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+void indexDocs(java.io.File file)
+         throws java.io.IOException,
+                java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+processFile

+
+private void processFile(java.io.File file)
+                  throws java.io.IOException,
+                         org.apache.lucene.index.CorruptIndexException,
+                         java.lang.InterruptedException,
+                         java.io.FileNotFoundException,
+                         java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.IOException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+
+ +

+addDocument

+
+protected java.lang.Boolean addDocument(java.io.File file,
+                                        java.lang.String lang,
+                                        java.lang.String dcMetaData,
+                                        java.lang.String textId)
+                                 throws org.apache.lucene.index.CorruptIndexException,
+                                        java.io.IOException,
+                                        java.io.FileNotFoundException,
+                                        java.io.UnsupportedEncodingException,
+                                        javax.xml.parsers.ParserConfigurationException,
+                                        org.xml.sax.SAXException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException +
javax.xml.parsers.ParserConfigurationException +
org.xml.sax.SAXException
+
+
+
+ +

+getTextId

+
+private java.lang.String getTextId(java.io.File file)
+
+
+
+
+
+
+ +

+absPathToTextId

+
+private java.lang.String absPathToTextId(java.lang.String absolutePath)
+
+
+
+
+
+
+ +

+checkFileAndRemoveOldFile

+
+private int checkFileAndRemoveOldFile(java.lang.String filePath,
+                                      java.lang.String lang,
+                                      boolean deleteWrongLanguage,
+                                      long fileModDate)
+                               throws org.apache.lucene.index.CorruptIndexException,
+                                      java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+checkSupportedLanguages

+
+private java.lang.String checkSupportedLanguages(java.lang.String lang)
+
+
+
+
+
+
+ +

+setIndexMetaPriority

+
+public void setIndexMetaPriority(boolean prio)
+
+
+
+
+
+
+ +

+getIndexMetaPriority

+
+public boolean getIndexMetaPriority()
+
+
+
+
+
+
+ +

+isTextFile

+
+private boolean isTextFile(java.io.File file)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,34 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + +
+Classes  + +
+OCRProcessFileThread +
+ProcessFileThread
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,161 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.harvester.processors +

+ + + + + + + + + + + + + +
+Class Summary
OCRProcessFileThread 
ProcessFileThread 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,158 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.harvester.processors +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • java.lang.Thread (implements java.lang.Runnable) + +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,189 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.harvester.processors

+
+ + + + + + + + + + + + + +
+Packages that use de.mpiwg.dwinter.fulltextIndexer.harvester.processors
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.harvester.processors used by de.mpiwg.dwinter.fulltextIndexer.harvester
ProcessFileThread + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.harvester.processors used by de.mpiwg.dwinter.fulltextIndexer.harvester.processors
ProcessFileThread + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/OCRProcessFileThread.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/OCRProcessFileThread.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,357 @@ + + + + + + +OCRProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.processors +
+Class OCRProcessFileThread

+
+java.lang.Object
+  extended by java.lang.Thread
+      extended by de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
+          extended by de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread
+
+
+
All Implemented Interfaces:
java.lang.Runnable
+
+
+
+
public class OCRProcessFileThread
extends ProcessFileThread
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.lang.Thread
java.lang.Thread.State, java.lang.Thread.UncaughtExceptionHandler
+  + + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
deduceFromFolderPriority, docDir, done, excludeFolders, fileTypesToIndex, index_dir, indexMetaPriority, languageAnalyzers, languageFileName, languageToISO, supportedLanguageFolder
+ + + + + + + +
Fields inherited from class java.lang.Thread
MAX_PRIORITY, MIN_PRIORITY, NORM_PRIORITY
+  + + + + + + + + + + +
+Constructor Summary
OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
+  + + + + + + + + + + + +
+Method Summary
+protected  java.lang.BooleanaddDocument(java.io.File file, + java.lang.String lang, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+ + + + + + + +
Methods inherited from class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
getIndexMetaPriority, harvestFolder, indexDocs, loadLanguages, run, setIndexMetaPriority
+ + + + + + + +
Methods inherited from class java.lang.Thread
activeCount, checkAccess, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OCRProcessFileThread

+
+public OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2,
+                            java.io.File file,
+                            java.lang.String lfn,
+                            java.util.HashMap<java.lang.String,java.lang.String> tl,
+                            java.lang.String mdProviderUrl,
+                            java.lang.String preferedLanguage,
+                            java.util.HashMap<java.lang.String,java.lang.String> languageToISO,
+                            java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder)
+
+
+ + + + + + + + +
+Method Detail
+ +

+addDocument

+
+protected java.lang.Boolean addDocument(java.io.File file,
+                                        java.lang.String lang,
+                                        java.lang.String dcMetaData,
+                                        java.lang.String textId)
+                                 throws org.apache.lucene.index.CorruptIndexException,
+                                        java.io.IOException,
+                                        java.io.FileNotFoundException,
+                                        java.io.UnsupportedEncodingException,
+                                        javax.xml.parsers.ParserConfigurationException,
+                                        org.xml.sax.SAXException
+
+
+
Overrides:
addDocument in class ProcessFileThread
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException +
javax.xml.parsers.ParserConfigurationException +
org.xml.sax.SAXException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/ProcessFileThread.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/ProcessFileThread.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,1146 @@ + + + + + + +ProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.harvester.processors +
+Class ProcessFileThread

+
+java.lang.Object
+  extended by java.lang.Thread
+      extended by de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread
+
+
+
All Implemented Interfaces:
java.lang.Runnable
+
+
+
Direct Known Subclasses:
OCRProcessFileThread
+
+
+
+
public class ProcessFileThread
extends java.lang.Thread
+ + +

+


+ +

+ + + + + + + +
+Nested Class Summary
+ + + + + + + +
Nested classes/interfaces inherited from class java.lang.Thread
java.lang.Thread.State, java.lang.Thread.UncaughtExceptionHandler
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  intcounter + +
+           
+protected  booleandeduceFromFolderPriority + +
+           
+private static intDELETED_OLD_VERSION + +
+           
+private static intDELETED_WRONG_LANGUAGE + +
+           
+protected  java.io.FiledocDir + +
+           
+ booleandone + +
+           
+protected  java.util.ArrayList<java.lang.String>excludeFolders + +
+           
+private static intFILE_EXISTS + +
+           
+protected  java.util.ArrayList<java.lang.String>fileTypesToIndex + +
+           
+protected  java.io.Fileindex_dir + +
+           
+protected  booleanindexMetaPriority + +
+           
+protected  LanguageAnalyzerslanguageAnalyzers + +
+           
+protected  java.lang.StringlanguageFileName + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>languageToISO + +
+           
+private  java.lang.StringmdProviderUrl + +
+           
+private  java.lang.Stringmode + +
+           
+private static intNEW_FILE + +
+           
+private  java.lang.StringpreferedLanguage + +
+           
+private  java.io.FileprocessThisFile + +
+           
+private  java.lang.StringspecialMode + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>supportedLanguageFolder + +
+           
+private static java.lang.StringTEXTIDFROMPATH_REGEXP + +
+           
+private  java.util.HashMap<java.lang.String,java.lang.String>textLanguage + +
+           
+ + + + + + + +
Fields inherited from class java.lang.Thread
MAX_PRIORITY, MIN_PRIORITY, NORM_PRIORITY
+  + + + + + + + + + + + + + +
+Constructor Summary
ProcessFileThread(java.io.File docDir, + java.io.File index_dir, + java.lang.String languageFileName, + java.io.File processThisFile, + java.lang.String mdProviderUrl, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
ProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+private  java.lang.StringabsPathToTextId(java.lang.String absolutePath) + +
+           
+protected  java.lang.BooleanaddDocument(java.io.File file, + java.lang.String lang, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+private  intcheckFileAndRemoveOldFile(java.lang.String filePath, + java.lang.String lang, + boolean deleteWrongLanguage, + long fileModDate) + +
+           
+private  java.lang.StringcheckSupportedLanguages(java.lang.String lang) + +
+           
+private  java.lang.StringdeduceFromFolderName(java.io.File file) + +
+           
+private  java.lang.StringgetDCFromIndexMeta(java.lang.String textId) + +
+           
+ booleangetIndexMetaPriority() + +
+           
+private  java.lang.StringgetLanguageFromIndexMeta(java.io.File file) + +
+           
+private  java.lang.StringgetLanguageOfText(java.lang.String textId, + java.io.File file) + +
+           
+private  java.lang.StringgetTextId(java.io.File file) + +
+           
+ voidharvestFolder() + +
+           
+(package private)  voidindexDocs(java.io.File file) + +
+           
+private  booleanisTextFile(java.io.File file) + +
+           
+protected  java.util.HashMap<java.lang.String,java.lang.String>loadLanguages() + +
+           
+private  voidprocessFile(java.io.File file) + +
+           
+ voidrun() + +
+           
+ voidsetIndexMetaPriority(boolean prio) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Thread
activeCount, checkAccess, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+TEXTIDFROMPATH_REGEXP

+
+private static final java.lang.String TEXTIDFROMPATH_REGEXP
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DELETED_WRONG_LANGUAGE

+
+private static final int DELETED_WRONG_LANGUAGE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+DELETED_OLD_VERSION

+
+private static final int DELETED_OLD_VERSION
+
+
+
See Also:
Constant Field Values
+
+
+ +

+NEW_FILE

+
+private static final int NEW_FILE
+
+
+
See Also:
Constant Field Values
+
+
+ +

+FILE_EXISTS

+
+private static final int FILE_EXISTS
+
+
+
See Also:
Constant Field Values
+
+
+ +

+docDir

+
+protected java.io.File docDir
+
+
+
+
+
+ +

+index_dir

+
+protected java.io.File index_dir
+
+
+
+
+
+ +

+fileTypesToIndex

+
+protected java.util.ArrayList<java.lang.String> fileTypesToIndex
+
+
+
+
+
+ +

+excludeFolders

+
+protected java.util.ArrayList<java.lang.String> excludeFolders
+
+
+
+
+
+ +

+textLanguage

+
+private java.util.HashMap<java.lang.String,java.lang.String> textLanguage
+
+
+
+
+
+ +

+languageToISO

+
+protected java.util.HashMap<java.lang.String,java.lang.String> languageToISO
+
+
+
+
+
+ +

+languageAnalyzers

+
+protected LanguageAnalyzers languageAnalyzers
+
+
+
+
+
+ +

+supportedLanguageFolder

+
+protected java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder
+
+
+
+
+
+ +

+counter

+
+private int counter
+
+
+
+
+
+ +

+languageFileName

+
+protected java.lang.String languageFileName
+
+
+
+
+
+ +

+indexMetaPriority

+
+protected boolean indexMetaPriority
+
+
+
+
+
+ +

+deduceFromFolderPriority

+
+protected boolean deduceFromFolderPriority
+
+
+
+
+
+ +

+specialMode

+
+private java.lang.String specialMode
+
+
+
+
+
+ +

+done

+
+public boolean done
+
+
+
+
+
+ +

+processThisFile

+
+private java.io.File processThisFile
+
+
+
+
+
+ +

+mode

+
+private java.lang.String mode
+
+
+
+
+
+ +

+mdProviderUrl

+
+private java.lang.String mdProviderUrl
+
+
+
+
+
+ +

+preferedLanguage

+
+private java.lang.String preferedLanguage
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ProcessFileThread

+
+public ProcessFileThread(java.io.File docDir,
+                         java.io.File index_dir,
+                         java.lang.String languageFileName,
+                         java.io.File processThisFile,
+                         java.lang.String mdProviderUrl,
+                         java.util.HashMap<java.lang.String,java.lang.String> languageToISO,
+                         java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder)
+                  throws org.apache.lucene.index.CorruptIndexException,
+                         org.apache.lucene.store.LockObtainFailedException,
+                         java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+ProcessFileThread

+
+public ProcessFileThread(LanguageAnalyzers languageAnalyzers2,
+                         java.io.File file,
+                         java.lang.String lfn,
+                         java.util.HashMap<java.lang.String,java.lang.String> tl,
+                         java.lang.String mdProviderUrl,
+                         java.lang.String preferedLanguage,
+                         java.util.HashMap<java.lang.String,java.lang.String> languageToISO,
+                         java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder)
+
+
+ + + + + + + + +
+Method Detail
+ +

+run

+
+public void run()
+
+
+
Specified by:
run in interface java.lang.Runnable
Overrides:
run in class java.lang.Thread
+
+
+
+
+
+
+ +

+getLanguageOfText

+
+private java.lang.String getLanguageOfText(java.lang.String textId,
+                                           java.io.File file)
+                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+deduceFromFolderName

+
+private java.lang.String deduceFromFolderName(java.io.File file)
+
+
+
+
+
+
+ +

+getLanguageFromIndexMeta

+
+private java.lang.String getLanguageFromIndexMeta(java.io.File file)
+                                           throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+getDCFromIndexMeta

+
+private java.lang.String getDCFromIndexMeta(java.lang.String textId)
+                                     throws java.io.IOException,
+                                            org.apache.xmlrpc.XmlRpcException
+
+
+ +
Throws: +
java.io.IOException +
org.apache.xmlrpc.XmlRpcException
+
+
+
+ +

+loadLanguages

+
+protected java.util.HashMap<java.lang.String,java.lang.String> loadLanguages()
+
+
+
+
+
+
+ +

+harvestFolder

+
+public void harvestFolder()
+                   throws java.lang.InterruptedException
+
+
+ +
Throws: +
java.lang.InterruptedException
+
+
+
+ +

+indexDocs

+
+void indexDocs(java.io.File file)
+         throws java.io.IOException,
+                java.lang.InterruptedException
+
+
+ +
Throws: +
java.io.IOException +
java.lang.InterruptedException
+
+
+
+ +

+processFile

+
+private void processFile(java.io.File file)
+                  throws java.io.IOException,
+                         org.apache.lucene.index.CorruptIndexException,
+                         java.lang.InterruptedException,
+                         java.io.FileNotFoundException,
+                         java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.IOException +
org.apache.lucene.index.CorruptIndexException +
java.lang.InterruptedException +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+
+ +

+addDocument

+
+protected java.lang.Boolean addDocument(java.io.File file,
+                                        java.lang.String lang,
+                                        java.lang.String dcMetaData,
+                                        java.lang.String textId)
+                                 throws org.apache.lucene.index.CorruptIndexException,
+                                        java.io.IOException,
+                                        java.io.FileNotFoundException,
+                                        java.io.UnsupportedEncodingException,
+                                        javax.xml.parsers.ParserConfigurationException,
+                                        org.xml.sax.SAXException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException +
javax.xml.parsers.ParserConfigurationException +
org.xml.sax.SAXException
+
+
+
+ +

+getTextId

+
+private java.lang.String getTextId(java.io.File file)
+
+
+
+
+
+
+ +

+absPathToTextId

+
+private java.lang.String absPathToTextId(java.lang.String absolutePath)
+
+
+
+
+
+
+ +

+checkFileAndRemoveOldFile

+
+private int checkFileAndRemoveOldFile(java.lang.String filePath,
+                                      java.lang.String lang,
+                                      boolean deleteWrongLanguage,
+                                      long fileModDate)
+                               throws org.apache.lucene.index.CorruptIndexException,
+                                      java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+checkSupportedLanguages

+
+private java.lang.String checkSupportedLanguages(java.lang.String lang)
+
+
+
+
+
+
+ +

+setIndexMetaPriority

+
+public void setIndexMetaPriority(boolean prio)
+
+
+
+
+
+
+ +

+getIndexMetaPriority

+
+public boolean getIndexMetaPriority()
+
+
+
+
+
+
+ +

+isTextFile

+
+private boolean isTextFile(java.io.File file)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,17 @@ +K 25 +svn:wc:ra_dav:version-url +V 108 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use +END +OCRProcessFileThread.html +K 25 +svn:wc:ra_dav:version-url +V 134 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/OCRProcessFileThread.html +END +ProcessFileThread.html +K 25 +svn:wc:ra_dav:version-url +V 131 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/ProcessFileThread.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,103 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRProcessFileThread.html +file + + + + +2010-06-28T09:04:53.000000Z +46fbd468025c94192413d186710c5217 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6387 + +ProcessFileThread.html +file + + + + +2010-06-28T09:04:53.000000Z +15a12e1213a93c4a02b1b55cfaafec50 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +11181 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/prop-base/OCRProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/prop-base/OCRProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/prop-base/ProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/prop-base/ProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/text-base/OCRProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/text-base/OCRProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/text-base/ProcessFileThread.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/.svn/text-base/ProcessFileThread.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,217 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread

+
+ + + + + + + + + + + + + +
+Packages that use ProcessFileThread
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
+  +

+ + + + + +
+Uses of ProcessFileThread in de.mpiwg.dwinter.fulltextIndexer.harvester
+  +

+ + + + + + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.harvester declared as ProcessFileThread
+protected  ProcessFileThread[]HarvesterThreaded.mythreads + +
+           
+protected  ProcessFileThread[]DocHarvesterThreaded.mythreads + +
+           
+  +

+ + + + + +
+Uses of ProcessFileThread in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
+  +

+ + + + + + + + + +
Subclasses of ProcessFileThread in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
+ classOCRProcessFileThread + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/OCRProcessFileThread.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/OCRProcessFileThread.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/ProcessFileThread.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/class-use/ProcessFileThread.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,217 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread

+
+ + + + + + + + + + + + + +
+Packages that use ProcessFileThread
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
+  +

+ + + + + +
+Uses of ProcessFileThread in de.mpiwg.dwinter.fulltextIndexer.harvester
+  +

+ + + + + + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.harvester declared as ProcessFileThread
+protected  ProcessFileThread[]HarvesterThreaded.mythreads + +
+           
+protected  ProcessFileThread[]DocHarvesterThreaded.mythreads + +
+           
+  +

+ + + + + +
+Uses of ProcessFileThread in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
+  +

+ + + + + + + + + +
Subclasses of ProcessFileThread in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
+ classOCRProcessFileThread + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,34 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + +
+Classes  + +
+OCRProcessFileThread +
+ProcessFileThread
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,161 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.harvester.processors +

+ + + + + + + + + + + + + +
+Class Summary
OCRProcessFileThread 
ProcessFileThread 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,158 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.harvester.processors Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.harvester.processors +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • java.lang.Thread (implements java.lang.Runnable) + +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,189 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.harvester.processors + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.harvester.processors

+
+ + + + + + + + + + + + + +
+Packages that use de.mpiwg.dwinter.fulltextIndexer.harvester.processors
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.harvester.processors used by de.mpiwg.dwinter.fulltextIndexer.harvester
ProcessFileThread + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.fulltextIndexer.harvester.processors used by de.mpiwg.dwinter.fulltextIndexer.harvester.processors
ProcessFileThread + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,41 @@ +K 25 +svn:wc:ra_dav:version-url +V 83 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils +END +ParseOcrDocument.html +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/ParseOcrDocument.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 102 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 101 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-tree.html +END +ParseIndexMeta.html +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/ParseIndexMeta.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 100 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 104 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-summary.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,242 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +ParseIndexMeta.html +file + + + + +2010-06-28T09:04:52.000000Z +978bf8555c256920705db6d4266d46be +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +21967 + +ParseOcrDocument.html +file + + + + +2010-06-28T09:04:52.000000Z +1c8fcbbdac69a0d491f2f4ed3cf26b07 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +27362 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +8256e3ce5b605635619076b7fb573d60 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1091 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +2cceabc52302c7858456c4ae8c2fc6d9 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6714 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +3b980fd9190126906c7514dccbf1ada7 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6768 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +2a4d9f61c636841ed0f71156b0ae2bf2 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5675 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseIndexMeta.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseIndexMeta.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseIndexMeta.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseIndexMeta.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,602 @@ + + + + + + +ParseIndexMeta + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.utils +
+Class ParseIndexMeta

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta
+
+
+
All Implemented Interfaces:
org.xml.sax.ContentHandler
+
+
+
+
public class ParseIndexMeta
extends java.lang.Object
implements org.xml.sax.ContentHandler
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  java.lang.StringcurrentToken + +
+           
+ java.lang.Stringlang + +
+           
+ java.util.HashMap<java.lang.String,java.lang.String>value + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ParseIndexMeta() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidcharacters(char[] c, + int start, + int length) + +
+           
+ voidendDocument() + +
+           
+ voidendElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name) + +
+           
+ voidendPrefixMapping(java.lang.String prefix) + +
+           
+ voidignorableWhitespace(char[] c, + int start, + int length) + +
+           
+ voidprocessingInstruction(java.lang.String target, + java.lang.String data) + +
+           
+ voidsetDocumentLocator(org.xml.sax.Locator arg1) + +
+           
+ voidskippedEntity(java.lang.String name) + +
+           
+ voidstartDocument() + +
+           
+ voidstartElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name, + org.xml.sax.Attributes attrs) + +
+           
+ voidstartPrefixMapping(java.lang.String prefix, + java.lang.String uri) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+value

+
+public java.util.HashMap<java.lang.String,java.lang.String> value
+
+
+
+
+
+ +

+currentToken

+
+private java.lang.String currentToken
+
+
+
+
+
+ +

+lang

+
+public java.lang.String lang
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ParseIndexMeta

+
+public ParseIndexMeta()
+
+
+ + + + + + + + +
+Method Detail
+ +

+startDocument

+
+public void startDocument()
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
startDocument in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endDocument

+
+public void endDocument()
+                 throws org.xml.sax.SAXException
+
+
+
Specified by:
endDocument in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+characters

+
+public void characters(char[] c,
+                       int start,
+                       int length)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
characters in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+ignorableWhitespace

+
+public void ignorableWhitespace(char[] c,
+                                int start,
+                                int length)
+                         throws org.xml.sax.SAXException
+
+
+
Specified by:
ignorableWhitespace in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+processingInstruction

+
+public void processingInstruction(java.lang.String target,
+                                  java.lang.String data)
+                           throws org.xml.sax.SAXException
+
+
+
Specified by:
processingInstruction in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+setDocumentLocator

+
+public void setDocumentLocator(org.xml.sax.Locator arg1)
+
+
+
Specified by:
setDocumentLocator in interface org.xml.sax.ContentHandler
+
+
+
+
+
+
+ +

+endElement

+
+public void endElement(java.lang.String uri,
+                       java.lang.String localName,
+                       java.lang.String name)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
endElement in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endPrefixMapping

+
+public void endPrefixMapping(java.lang.String prefix)
+                      throws org.xml.sax.SAXException
+
+
+
Specified by:
endPrefixMapping in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+skippedEntity

+
+public void skippedEntity(java.lang.String name)
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
skippedEntity in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startElement

+
+public void startElement(java.lang.String uri,
+                         java.lang.String localName,
+                         java.lang.String name,
+                         org.xml.sax.Attributes attrs)
+                  throws org.xml.sax.SAXException
+
+
+
Specified by:
startElement in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startPrefixMapping

+
+public void startPrefixMapping(java.lang.String prefix,
+                               java.lang.String uri)
+                        throws org.xml.sax.SAXException
+
+
+
Specified by:
startPrefixMapping in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,697 @@ + + + + + + +ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.utils +
+Class ParseOcrDocument

+
+java.lang.Object
+  extended by org.xml.sax.helpers.DefaultHandler
+      extended by de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument
+
+
+
All Implemented Interfaces:
org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler
+
+
+
+
public class ParseOcrDocument
extends org.xml.sax.helpers.DefaultHandler
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  OCRDocument.OCRLinecurrentLine + +
+           
+private  booleaninLine + +
+           
+private  intlineCounter + +
+           
+ OCRDocumentocrDocument + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ParseOcrDocument() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidcharacters(char[] c, + int start, + int length) + +
+           
+private  voiddoLine(org.xml.sax.Attributes attrs) + +
+           
+private  voiddoPage(org.xml.sax.Attributes attrs) + +
+           
+ voidendDocument() + +
+           
+ voidendElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name) + +
+           
+ voidendPrefixMapping(java.lang.String prefix) + +
+           
+ voidignorableWhitespace(char[] c, + int start, + int length) + +
+           
+ voidprocessingInstruction(java.lang.String target, + java.lang.String data) + +
+           
+ org.xml.sax.InputSourceresolveEntity(java.lang.String publicId, + java.lang.String systemId) + +
+           
+ voidsetDocumentLocator(org.xml.sax.Locator arg1) + +
+           
+ voidskippedEntity(java.lang.String name) + +
+           
+ voidstartDocument() + +
+           
+ voidstartElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name, + org.xml.sax.Attributes attrs) + +
+           
+ voidstartPrefixMapping(java.lang.String prefix, + java.lang.String uri) + +
+           
+ + + + + + + +
Methods inherited from class org.xml.sax.helpers.DefaultHandler
error, fatalError, notationDecl, unparsedEntityDecl, warning
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+ocrDocument

+
+public OCRDocument ocrDocument
+
+
+
+
+
+ +

+lineCounter

+
+private int lineCounter
+
+
+
+
+
+ +

+currentLine

+
+private OCRDocument.OCRLine currentLine
+
+
+
+
+
+ +

+inLine

+
+private boolean inLine
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ParseOcrDocument

+
+public ParseOcrDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+startDocument

+
+public void startDocument()
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
startDocument in interface org.xml.sax.ContentHandler
Overrides:
startDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endDocument

+
+public void endDocument()
+                 throws org.xml.sax.SAXException
+
+
+
Specified by:
endDocument in interface org.xml.sax.ContentHandler
Overrides:
endDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+characters

+
+public void characters(char[] c,
+                       int start,
+                       int length)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
characters in interface org.xml.sax.ContentHandler
Overrides:
characters in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+ignorableWhitespace

+
+public void ignorableWhitespace(char[] c,
+                                int start,
+                                int length)
+                         throws org.xml.sax.SAXException
+
+
+
Specified by:
ignorableWhitespace in interface org.xml.sax.ContentHandler
Overrides:
ignorableWhitespace in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+processingInstruction

+
+public void processingInstruction(java.lang.String target,
+                                  java.lang.String data)
+                           throws org.xml.sax.SAXException
+
+
+
Specified by:
processingInstruction in interface org.xml.sax.ContentHandler
Overrides:
processingInstruction in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+setDocumentLocator

+
+public void setDocumentLocator(org.xml.sax.Locator arg1)
+
+
+
Specified by:
setDocumentLocator in interface org.xml.sax.ContentHandler
Overrides:
setDocumentLocator in class org.xml.sax.helpers.DefaultHandler
+
+
+
+
+
+
+ +

+endElement

+
+public void endElement(java.lang.String uri,
+                       java.lang.String localName,
+                       java.lang.String name)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
endElement in interface org.xml.sax.ContentHandler
Overrides:
endElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endPrefixMapping

+
+public void endPrefixMapping(java.lang.String prefix)
+                      throws org.xml.sax.SAXException
+
+
+
Specified by:
endPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
endPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+skippedEntity

+
+public void skippedEntity(java.lang.String name)
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
skippedEntity in interface org.xml.sax.ContentHandler
Overrides:
skippedEntity in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startElement

+
+public void startElement(java.lang.String uri,
+                         java.lang.String localName,
+                         java.lang.String name,
+                         org.xml.sax.Attributes attrs)
+                  throws org.xml.sax.SAXException
+
+
+
Specified by:
startElement in interface org.xml.sax.ContentHandler
Overrides:
startElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+doPage

+
+private void doPage(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+doLine

+
+private void doLine(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+startPrefixMapping

+
+public void startPrefixMapping(java.lang.String prefix,
+                               java.lang.String uri)
+                        throws org.xml.sax.SAXException
+
+
+
Specified by:
startPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
startPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+resolveEntity

+
+public org.xml.sax.InputSource resolveEntity(java.lang.String publicId,
+                                             java.lang.String systemId)
+                                      throws org.xml.sax.SAXException,
+                                             java.io.IOException
+
+
+
Specified by:
resolveEntity in interface org.xml.sax.EntityResolver
Overrides:
resolveEntity in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,34 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils + + + + +
+Classes  + +
+ParseIndexMeta +
+ParseOcrDocument
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,161 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.utils +

+ + + + + + + + + + + + + +
+Class Summary
ParseIndexMeta 
ParseOcrDocument 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.utils +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.xml.sax.helpers.DefaultHandler (implements org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler) + +
    • de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta (implements org.xml.sax.ContentHandler) +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.utils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.utils

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.utils +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/ParseIndexMeta.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/ParseIndexMeta.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,602 @@ + + + + + + +ParseIndexMeta + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.utils +
+Class ParseIndexMeta

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta
+
+
+
All Implemented Interfaces:
org.xml.sax.ContentHandler
+
+
+
+
public class ParseIndexMeta
extends java.lang.Object
implements org.xml.sax.ContentHandler
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  java.lang.StringcurrentToken + +
+           
+ java.lang.Stringlang + +
+           
+ java.util.HashMap<java.lang.String,java.lang.String>value + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ParseIndexMeta() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidcharacters(char[] c, + int start, + int length) + +
+           
+ voidendDocument() + +
+           
+ voidendElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name) + +
+           
+ voidendPrefixMapping(java.lang.String prefix) + +
+           
+ voidignorableWhitespace(char[] c, + int start, + int length) + +
+           
+ voidprocessingInstruction(java.lang.String target, + java.lang.String data) + +
+           
+ voidsetDocumentLocator(org.xml.sax.Locator arg1) + +
+           
+ voidskippedEntity(java.lang.String name) + +
+           
+ voidstartDocument() + +
+           
+ voidstartElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name, + org.xml.sax.Attributes attrs) + +
+           
+ voidstartPrefixMapping(java.lang.String prefix, + java.lang.String uri) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+value

+
+public java.util.HashMap<java.lang.String,java.lang.String> value
+
+
+
+
+
+ +

+currentToken

+
+private java.lang.String currentToken
+
+
+
+
+
+ +

+lang

+
+public java.lang.String lang
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ParseIndexMeta

+
+public ParseIndexMeta()
+
+
+ + + + + + + + +
+Method Detail
+ +

+startDocument

+
+public void startDocument()
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
startDocument in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endDocument

+
+public void endDocument()
+                 throws org.xml.sax.SAXException
+
+
+
Specified by:
endDocument in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+characters

+
+public void characters(char[] c,
+                       int start,
+                       int length)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
characters in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+ignorableWhitespace

+
+public void ignorableWhitespace(char[] c,
+                                int start,
+                                int length)
+                         throws org.xml.sax.SAXException
+
+
+
Specified by:
ignorableWhitespace in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+processingInstruction

+
+public void processingInstruction(java.lang.String target,
+                                  java.lang.String data)
+                           throws org.xml.sax.SAXException
+
+
+
Specified by:
processingInstruction in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+setDocumentLocator

+
+public void setDocumentLocator(org.xml.sax.Locator arg1)
+
+
+
Specified by:
setDocumentLocator in interface org.xml.sax.ContentHandler
+
+
+
+
+
+
+ +

+endElement

+
+public void endElement(java.lang.String uri,
+                       java.lang.String localName,
+                       java.lang.String name)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
endElement in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endPrefixMapping

+
+public void endPrefixMapping(java.lang.String prefix)
+                      throws org.xml.sax.SAXException
+
+
+
Specified by:
endPrefixMapping in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+skippedEntity

+
+public void skippedEntity(java.lang.String name)
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
skippedEntity in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startElement

+
+public void startElement(java.lang.String uri,
+                         java.lang.String localName,
+                         java.lang.String name,
+                         org.xml.sax.Attributes attrs)
+                  throws org.xml.sax.SAXException
+
+
+
Specified by:
startElement in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startPrefixMapping

+
+public void startPrefixMapping(java.lang.String prefix,
+                               java.lang.String uri)
+                        throws org.xml.sax.SAXException
+
+
+
Specified by:
startPrefixMapping in interface org.xml.sax.ContentHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/ParseOcrDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/ParseOcrDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,697 @@ + + + + + + +ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.fulltextIndexer.utils +
+Class ParseOcrDocument

+
+java.lang.Object
+  extended by org.xml.sax.helpers.DefaultHandler
+      extended by de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument
+
+
+
All Implemented Interfaces:
org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler
+
+
+
+
public class ParseOcrDocument
extends org.xml.sax.helpers.DefaultHandler
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+private  OCRDocument.OCRLinecurrentLine + +
+           
+private  booleaninLine + +
+           
+private  intlineCounter + +
+           
+ OCRDocumentocrDocument + +
+           
+  + + + + + + + + + + +
+Constructor Summary
ParseOcrDocument() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidcharacters(char[] c, + int start, + int length) + +
+           
+private  voiddoLine(org.xml.sax.Attributes attrs) + +
+           
+private  voiddoPage(org.xml.sax.Attributes attrs) + +
+           
+ voidendDocument() + +
+           
+ voidendElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name) + +
+           
+ voidendPrefixMapping(java.lang.String prefix) + +
+           
+ voidignorableWhitespace(char[] c, + int start, + int length) + +
+           
+ voidprocessingInstruction(java.lang.String target, + java.lang.String data) + +
+           
+ org.xml.sax.InputSourceresolveEntity(java.lang.String publicId, + java.lang.String systemId) + +
+           
+ voidsetDocumentLocator(org.xml.sax.Locator arg1) + +
+           
+ voidskippedEntity(java.lang.String name) + +
+           
+ voidstartDocument() + +
+           
+ voidstartElement(java.lang.String uri, + java.lang.String localName, + java.lang.String name, + org.xml.sax.Attributes attrs) + +
+           
+ voidstartPrefixMapping(java.lang.String prefix, + java.lang.String uri) + +
+           
+ + + + + + + +
Methods inherited from class org.xml.sax.helpers.DefaultHandler
error, fatalError, notationDecl, unparsedEntityDecl, warning
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+ocrDocument

+
+public OCRDocument ocrDocument
+
+
+
+
+
+ +

+lineCounter

+
+private int lineCounter
+
+
+
+
+
+ +

+currentLine

+
+private OCRDocument.OCRLine currentLine
+
+
+
+
+
+ +

+inLine

+
+private boolean inLine
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+ParseOcrDocument

+
+public ParseOcrDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+startDocument

+
+public void startDocument()
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
startDocument in interface org.xml.sax.ContentHandler
Overrides:
startDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endDocument

+
+public void endDocument()
+                 throws org.xml.sax.SAXException
+
+
+
Specified by:
endDocument in interface org.xml.sax.ContentHandler
Overrides:
endDocument in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+characters

+
+public void characters(char[] c,
+                       int start,
+                       int length)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
characters in interface org.xml.sax.ContentHandler
Overrides:
characters in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+ignorableWhitespace

+
+public void ignorableWhitespace(char[] c,
+                                int start,
+                                int length)
+                         throws org.xml.sax.SAXException
+
+
+
Specified by:
ignorableWhitespace in interface org.xml.sax.ContentHandler
Overrides:
ignorableWhitespace in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+processingInstruction

+
+public void processingInstruction(java.lang.String target,
+                                  java.lang.String data)
+                           throws org.xml.sax.SAXException
+
+
+
Specified by:
processingInstruction in interface org.xml.sax.ContentHandler
Overrides:
processingInstruction in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+setDocumentLocator

+
+public void setDocumentLocator(org.xml.sax.Locator arg1)
+
+
+
Specified by:
setDocumentLocator in interface org.xml.sax.ContentHandler
Overrides:
setDocumentLocator in class org.xml.sax.helpers.DefaultHandler
+
+
+
+
+
+
+ +

+endElement

+
+public void endElement(java.lang.String uri,
+                       java.lang.String localName,
+                       java.lang.String name)
+                throws org.xml.sax.SAXException
+
+
+
Specified by:
endElement in interface org.xml.sax.ContentHandler
Overrides:
endElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+endPrefixMapping

+
+public void endPrefixMapping(java.lang.String prefix)
+                      throws org.xml.sax.SAXException
+
+
+
Specified by:
endPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
endPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+skippedEntity

+
+public void skippedEntity(java.lang.String name)
+                   throws org.xml.sax.SAXException
+
+
+
Specified by:
skippedEntity in interface org.xml.sax.ContentHandler
Overrides:
skippedEntity in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+startElement

+
+public void startElement(java.lang.String uri,
+                         java.lang.String localName,
+                         java.lang.String name,
+                         org.xml.sax.Attributes attrs)
+                  throws org.xml.sax.SAXException
+
+
+
Specified by:
startElement in interface org.xml.sax.ContentHandler
Overrides:
startElement in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+doPage

+
+private void doPage(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+doLine

+
+private void doLine(org.xml.sax.Attributes attrs)
+
+
+
+
+
+
+ +

+startPrefixMapping

+
+public void startPrefixMapping(java.lang.String prefix,
+                               java.lang.String uri)
+                        throws org.xml.sax.SAXException
+
+
+
Specified by:
startPrefixMapping in interface org.xml.sax.ContentHandler
Overrides:
startPrefixMapping in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException
+
+
+
+ +

+resolveEntity

+
+public org.xml.sax.InputSource resolveEntity(java.lang.String publicId,
+                                             java.lang.String systemId)
+                                      throws org.xml.sax.SAXException,
+                                             java.io.IOException
+
+
+
Specified by:
resolveEntity in interface org.xml.sax.EntityResolver
Overrides:
resolveEntity in class org.xml.sax.helpers.DefaultHandler
+
+
+ +
Throws: +
org.xml.sax.SAXException +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,17 @@ +K 25 +svn:wc:ra_dav:version-url +V 93 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use +END +ParseOcrDocument.html +K 25 +svn:wc:ra_dav:version-url +V 115 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/ParseOcrDocument.html +END +ParseIndexMeta.html +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/ParseIndexMeta.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,103 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +ParseIndexMeta.html +file + + + + +2010-06-28T09:04:53.000000Z +6f3065b18d759c7b35795bf520325a19 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6126 + +ParseOcrDocument.html +file + + + + +2010-06-28T09:04:53.000000Z +2853b1a1c3ddfd7d99d9a25bd3e7ae12 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6146 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/prop-base/ParseIndexMeta.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/prop-base/ParseIndexMeta.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/prop-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/prop-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/text-base/ParseIndexMeta.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/text-base/ParseIndexMeta.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/text-base/ParseOcrDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/.svn/text-base/ParseOcrDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/ParseIndexMeta.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/ParseIndexMeta.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/ParseOcrDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/class-use/ParseOcrDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,34 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils + + + + + + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils + + + + +
+Classes  + +
+ParseIndexMeta +
+ParseOcrDocument
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,161 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.fulltextIndexer.utils +

+ + + + + + + + + + + + + +
+Class Summary
ParseIndexMeta 
ParseOcrDocument 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +de.mpiwg.dwinter.fulltextIndexer.utils Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.fulltextIndexer.utils +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.xml.sax.helpers.DefaultHandler (implements org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler) + +
    • de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta (implements org.xml.sax.ContentHandler) +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/fulltextIndexer/utils/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.fulltextIndexer.utils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.fulltextIndexer.utils

+
+No usage of de.mpiwg.dwinter.fulltextIndexer.utils +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,35 @@ +K 25 +svn:wc:ra_dav:version-url +V 74 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools +END +LineCollector.html +K 25 +svn:wc:ra_dav:version-url +V 93 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/LineCollector.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 93 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 92 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/package-tree.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 91 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 95 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/package-summary.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,214 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +LineCollector.html +file + + + + +2010-06-28T09:04:52.000000Z +562be3dcbe2c8817e5eb30084cf1d28a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +12657 + +analyzer +dir + +class-use +dir + +documents +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +4230759fb1a974a0aaa91f9c8e6b7fce +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +913 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +a72aca68a600d140a9a07c7b572fe01d +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6345 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +6fb8b26b9e7a42271b7c4c21698eae5b +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6410 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +0e9ae9dbed58421525ed50a680081be1 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5576 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/LineCollector.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/LineCollector.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/LineCollector.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/LineCollector.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,321 @@ + + + + + + +LineCollector + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools +
+Class LineCollector

+
+java.lang.Object
+  extended by org.apache.lucene.search.Collector
+      extended by org.apache.lucene.search.TopDocsCollector<org.apache.lucene.search.ScoreDoc>
+          extended by org.apache.lucene.search.TopScoreDocCollector
+              extended by de.mpiwg.dwinter.lucencetools.LineCollector
+
+
+
+
public class LineCollector
extends org.apache.lucene.search.TopScoreDocCollector
+ + +

+


+ +

+ + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class org.apache.lucene.search.TopDocsCollector
EMPTY_TOPDOCS, pq, totalHits
+  + + + + + + + + + + + +
+Constructor Summary
+private LineCollector(int numhits) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ booleanacceptsDocsOutOfOrder() + +
+           
+ voidcollect(int arg0) + +
+           
+ + + + + + + +
Methods inherited from class org.apache.lucene.search.TopScoreDocCollector
create, newTopDocs, setNextReader, setScorer
+ + + + + + + +
Methods inherited from class org.apache.lucene.search.TopDocsCollector
getTotalHits, populateResults, topDocs, topDocs, topDocs
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+LineCollector

+
+private LineCollector(int numhits)
+
+
+ + + + + + + + +
+Method Detail
+ +

+acceptsDocsOutOfOrder

+
+public boolean acceptsDocsOutOfOrder()
+
+
+
Specified by:
acceptsDocsOutOfOrder in class org.apache.lucene.search.Collector
+
+
+
+
+
+
+ +

+collect

+
+public void collect(int arg0)
+             throws java.io.IOException
+
+
+
Specified by:
collect in class org.apache.lucene.search.Collector
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,32 @@ + + + + + + +de.mpiwg.dwinter.lucencetools + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools + + + + +
+Classes  + +
+LineCollector
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +de.mpiwg.dwinter.lucencetools + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools +

+ + + + + + + + + +
+Class Summary
LineCollector 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,159 @@ + + + + + + +de.mpiwg.dwinter.lucencetools Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.apache.lucene.search.Collector
        +
      • org.apache.lucene.search.TopDocsCollector<T>
          +
        • org.apache.lucene.search.TopScoreDocCollector +
        +
      +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools

+
+No usage of de.mpiwg.dwinter.lucencetools +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/LineCollector.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/LineCollector.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,321 @@ + + + + + + +LineCollector + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools +
+Class LineCollector

+
+java.lang.Object
+  extended by org.apache.lucene.search.Collector
+      extended by org.apache.lucene.search.TopDocsCollector<org.apache.lucene.search.ScoreDoc>
+          extended by org.apache.lucene.search.TopScoreDocCollector
+              extended by de.mpiwg.dwinter.lucencetools.LineCollector
+
+
+
+
public class LineCollector
extends org.apache.lucene.search.TopScoreDocCollector
+ + +

+


+ +

+ + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class org.apache.lucene.search.TopDocsCollector
EMPTY_TOPDOCS, pq, totalHits
+  + + + + + + + + + + + +
+Constructor Summary
+private LineCollector(int numhits) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ booleanacceptsDocsOutOfOrder() + +
+           
+ voidcollect(int arg0) + +
+           
+ + + + + + + +
Methods inherited from class org.apache.lucene.search.TopScoreDocCollector
create, newTopDocs, setNextReader, setScorer
+ + + + + + + +
Methods inherited from class org.apache.lucene.search.TopDocsCollector
getTotalHits, populateResults, topDocs, topDocs, topDocs
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+LineCollector

+
+private LineCollector(int numhits)
+
+
+ + + + + + + + +
+Method Detail
+ +

+acceptsDocsOutOfOrder

+
+public boolean acceptsDocsOutOfOrder()
+
+
+
Specified by:
acceptsDocsOutOfOrder in class org.apache.lucene.search.Collector
+
+
+
+
+
+
+ +

+collect

+
+public void collect(int arg0)
+             throws java.io.IOException
+
+
+
Specified by:
collect in class org.apache.lucene.search.Collector
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,47 @@ +K 25 +svn:wc:ra_dav:version-url +V 83 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 102 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-frame.html +END +LanguageAnalyzers.html +K 25 +svn:wc:ra_dav:version-url +V 106 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 101 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-tree.html +END +XMLFilteredReader.html +K 25 +svn:wc:ra_dav:version-url +V 106 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/XMLFilteredReader.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 100 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 104 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-summary.html +END +LanguageAnalyzer.html +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzer.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,279 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +LanguageAnalyzer.html +file + + + + +2010-06-28T09:04:51.000000Z +e48450d6993448bab40e615c05ca4999 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14471 + +LanguageAnalyzers.html +file + + + + +2010-06-28T09:04:51.000000Z +7acdd0792da14cdd96d4f532887b2004 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +22418 + +XMLFilteredReader.html +file + + + + +2010-06-28T09:04:52.000000Z +b4fcf8e44138f78bc954ff531ebdf0b3 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13117 + +class-use +dir + +donatusAnalyzer +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +74a4c6122e068e6d0066ee889f99ee71 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1233 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +830d4dba23368a50390329b58197d880 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7004 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +39c217a6b9026af455e76d0a914f466d +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7324 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +9228742f464a6a46991bc854d455c056 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +9171 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzers.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzers.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/XMLFilteredReader.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/XMLFilteredReader.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,374 @@ + + + + + + +LanguageAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer +
+Class LanguageAnalyzer

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer
+
+
+
+
public class LanguageAnalyzer
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+ org.apache.lucene.analysis.Analyzeranalyzer + +
+           
+ java.lang.Stringlang + +
+           
+ org.apache.lucene.index.IndexReaderreader + +
+           
+ org.apache.lucene.search.IndexSearchersearcher + +
+           
+ org.apache.lucene.index.IndexWriterwriter + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
LanguageAnalyzer(java.lang.String lang, + org.apache.lucene.analysis.Analyzer analyzer, + java.io.File index_dir) + +
+           
LanguageAnalyzer(java.lang.String lang, + org.apache.lucene.analysis.Analyzer analyzer, + java.io.File index_dir, + boolean onlyReader) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+lang

+
+public java.lang.String lang
+
+
+
+
+
+ +

+analyzer

+
+public org.apache.lucene.analysis.Analyzer analyzer
+
+
+
+
+
+ +

+writer

+
+public org.apache.lucene.index.IndexWriter writer
+
+
+
+
+
+ +

+reader

+
+public org.apache.lucene.index.IndexReader reader
+
+
+
+
+
+ +

+searcher

+
+public org.apache.lucene.search.IndexSearcher searcher
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+LanguageAnalyzer

+
+public LanguageAnalyzer(java.lang.String lang,
+                        org.apache.lucene.analysis.Analyzer analyzer,
+                        java.io.File index_dir,
+                        boolean onlyReader)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+LanguageAnalyzer

+
+public LanguageAnalyzer(java.lang.String lang,
+                        org.apache.lucene.analysis.Analyzer analyzer,
+                        java.io.File index_dir)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzers.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzers.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,527 @@ + + + + + + +LanguageAnalyzers + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer +
+Class LanguageAnalyzers

+
+java.lang.Object
+  extended by java.util.AbstractCollection<E>
+      extended by java.util.AbstractList<E>
+          extended by java.util.ArrayList<LanguageAnalyzer>
+              extended by de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Cloneable, java.lang.Iterable<LanguageAnalyzer>, java.util.Collection<LanguageAnalyzer>, java.util.List<LanguageAnalyzer>, java.util.RandomAccess
+
+
+
+
public class LanguageAnalyzers
extends java.util.ArrayList<LanguageAnalyzer>
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + + +
+Field Summary
+private static intMAX_HITS_PER_PAGE + +
+           
+private static longserialVersionUID + +
+           
+ + + + + + + +
Fields inherited from class java.util.AbstractList
modCount
+  + + + + + + + + + + +
+Constructor Summary
LanguageAnalyzers() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddDocument(org.apache.lucene.document.Document document, + java.lang.String lang) + +
+           
+ voidclose() + +
+           
+ voiddeleteDocument(int id) + +
+           
+ voiddeleteDocuments(org.apache.lucene.index.Term term) + +
+           
+ voiddeleteDocuments(org.apache.lucene.search.TermQuery query) + +
+           
+ LanguageAnalyzergetAnalyzer(java.lang.String lang) + +
+           
+ voidoptimize() + +
+           
+ java.util.HashMap<java.lang.String,org.apache.lucene.search.Collector>search(org.apache.lucene.search.TermQuery query) + +
+           
+ + + + + + + +
Methods inherited from class java.util.ArrayList
add, add, addAll, addAll, clear, clone, contains, ensureCapacity, get, indexOf, isEmpty, lastIndexOf, remove, remove, removeRange, set, size, toArray, toArray, trimToSize
+ + + + + + + +
Methods inherited from class java.util.AbstractList
equals, hashCode, iterator, listIterator, listIterator, subList
+ + + + + + + +
Methods inherited from class java.util.AbstractCollection
containsAll, removeAll, retainAll, toString
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+ + + + + + + +
Methods inherited from interface java.util.List
containsAll, equals, hashCode, iterator, listIterator, listIterator, removeAll, retainAll, subList
+  +

+ + + + + + + + +
+Field Detail
+ +

+serialVersionUID

+
+private static final long serialVersionUID
+
+
+
See Also:
Constant Field Values
+
+
+ +

+MAX_HITS_PER_PAGE

+
+private static final int MAX_HITS_PER_PAGE
+
+
+
See Also:
Constant Field Values
+
+ + + + + + + + +
+Constructor Detail
+ +

+LanguageAnalyzers

+
+public LanguageAnalyzers()
+
+
+ + + + + + + + +
+Method Detail
+ +

+optimize

+
+public void optimize()
+              throws org.apache.lucene.index.CorruptIndexException,
+                     java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+close

+
+public void close()
+           throws org.apache.lucene.index.CorruptIndexException,
+                  java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+addDocument

+
+public void addDocument(org.apache.lucene.document.Document document,
+                        java.lang.String lang)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+getAnalyzer

+
+public LanguageAnalyzer getAnalyzer(java.lang.String lang)
+
+
+
+
+
+
+ +

+deleteDocuments

+
+public void deleteDocuments(org.apache.lucene.index.Term term)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+deleteDocuments

+
+public void deleteDocuments(org.apache.lucene.search.TermQuery query)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+deleteDocument

+
+public void deleteDocument(int id)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+search

+
+public java.util.HashMap<java.lang.String,org.apache.lucene.search.Collector> search(org.apache.lucene.search.TermQuery query)
+                                                                              throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/XMLFilteredReader.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/XMLFilteredReader.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +XMLFilteredReader + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer +
+Class XMLFilteredReader

+
+java.lang.Object
+  extended by java.io.Reader
+      extended by java.io.InputStreamReader
+          extended by de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader
+
+
+
All Implemented Interfaces:
java.io.Closeable, java.lang.Readable
+
+
+
+
public class XMLFilteredReader
extends java.io.InputStreamReader
+ + +

+


+ +

+ + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.io.Reader
lock
+  + + + + + + + + + + +
+Constructor Summary
XMLFilteredReader(java.io.InputStream in, + java.lang.String charsetName) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ intread() + +
+           
+ intread(char[] cbuf, + int offset, + int length) + +
+           
+ + + + + + + +
Methods inherited from class java.io.InputStreamReader
close, getEncoding, ready
+ + + + + + + +
Methods inherited from class java.io.Reader
mark, markSupported, read, read, reset, skip
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+XMLFilteredReader

+
+public XMLFilteredReader(java.io.InputStream in,
+                         java.lang.String charsetName)
+                  throws java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.UnsupportedEncodingException
+
+ + + + + + + + +
+Method Detail
+ +

+read

+
+public int read()
+         throws java.io.IOException
+
+
+
Overrides:
read in class java.io.InputStreamReader
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+read

+
+public int read(char[] cbuf,
+                int offset,
+                int length)
+         throws java.io.IOException
+
+
+
Overrides:
read in class java.io.InputStreamReader
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer + + + + +
+Classes  + +
+LanguageAnalyzer +
+LanguageAnalyzers +
+XMLFilteredReader
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,165 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools.analyzer +

+ + + + + + + + + + + + + + + + + +
+Class Summary
LanguageAnalyzer 
LanguageAnalyzers 
XMLFilteredReader 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,167 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools.analyzer +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • java.util.AbstractCollection<E> (implements java.util.Collection<E>) +
        +
      • java.util.AbstractList<E> (implements java.util.List<E>) +
          +
        • java.util.ArrayList<E> (implements java.lang.Cloneable, java.util.List<E>, java.util.RandomAccess, java.io.Serializable) + +
        +
      +
    • de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer
    • java.io.Reader (implements java.io.Closeable, java.lang.Readable) +
        +
      • java.io.InputStreamReader +
      +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,208 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools.analyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools.analyzer

+
+ + + + + + + + + + + + + + + + + +
+Packages that use de.mpiwg.dwinter.lucencetools.analyzer
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
de.mpiwg.dwinter.lucencetools.analyzer  
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.lucencetools.analyzer used by de.mpiwg.dwinter.fulltextIndexer.harvester
LanguageAnalyzers + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.lucencetools.analyzer used by de.mpiwg.dwinter.fulltextIndexer.harvester.processors
LanguageAnalyzers + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.lucencetools.analyzer used by de.mpiwg.dwinter.lucencetools.analyzer
LanguageAnalyzer + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzer.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzer.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,374 @@ + + + + + + +LanguageAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer +
+Class LanguageAnalyzer

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer
+
+
+
+
public class LanguageAnalyzer
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+ org.apache.lucene.analysis.Analyzeranalyzer + +
+           
+ java.lang.Stringlang + +
+           
+ org.apache.lucene.index.IndexReaderreader + +
+           
+ org.apache.lucene.search.IndexSearchersearcher + +
+           
+ org.apache.lucene.index.IndexWriterwriter + +
+           
+  + + + + + + + + + + + + + +
+Constructor Summary
LanguageAnalyzer(java.lang.String lang, + org.apache.lucene.analysis.Analyzer analyzer, + java.io.File index_dir) + +
+           
LanguageAnalyzer(java.lang.String lang, + org.apache.lucene.analysis.Analyzer analyzer, + java.io.File index_dir, + boolean onlyReader) + +
+           
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+lang

+
+public java.lang.String lang
+
+
+
+
+
+ +

+analyzer

+
+public org.apache.lucene.analysis.Analyzer analyzer
+
+
+
+
+
+ +

+writer

+
+public org.apache.lucene.index.IndexWriter writer
+
+
+
+
+
+ +

+reader

+
+public org.apache.lucene.index.IndexReader reader
+
+
+
+
+
+ +

+searcher

+
+public org.apache.lucene.search.IndexSearcher searcher
+
+
+
+
+ + + + + + + + +
+Constructor Detail
+ +

+LanguageAnalyzer

+
+public LanguageAnalyzer(java.lang.String lang,
+                        org.apache.lucene.analysis.Analyzer analyzer,
+                        java.io.File index_dir,
+                        boolean onlyReader)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+
+ +

+LanguageAnalyzer

+
+public LanguageAnalyzer(java.lang.String lang,
+                        org.apache.lucene.analysis.Analyzer analyzer,
+                        java.io.File index_dir)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        org.apache.lucene.store.LockObtainFailedException,
+                        java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
org.apache.lucene.store.LockObtainFailedException +
java.io.IOException
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,527 @@ + + + + + + +LanguageAnalyzers + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer +
+Class LanguageAnalyzers

+
+java.lang.Object
+  extended by java.util.AbstractCollection<E>
+      extended by java.util.AbstractList<E>
+          extended by java.util.ArrayList<LanguageAnalyzer>
+              extended by de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Cloneable, java.lang.Iterable<LanguageAnalyzer>, java.util.Collection<LanguageAnalyzer>, java.util.List<LanguageAnalyzer>, java.util.RandomAccess
+
+
+
+
public class LanguageAnalyzers
extends java.util.ArrayList<LanguageAnalyzer>
+ + +

+

+
See Also:
Serialized Form
+
+ +

+ + + + + + + + + + + + + + + +
+Field Summary
+private static intMAX_HITS_PER_PAGE + +
+           
+private static longserialVersionUID + +
+           
+ + + + + + + +
Fields inherited from class java.util.AbstractList
modCount
+  + + + + + + + + + + +
+Constructor Summary
LanguageAnalyzers() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ voidaddDocument(org.apache.lucene.document.Document document, + java.lang.String lang) + +
+           
+ voidclose() + +
+           
+ voiddeleteDocument(int id) + +
+           
+ voiddeleteDocuments(org.apache.lucene.index.Term term) + +
+           
+ voiddeleteDocuments(org.apache.lucene.search.TermQuery query) + +
+           
+ LanguageAnalyzergetAnalyzer(java.lang.String lang) + +
+           
+ voidoptimize() + +
+           
+ java.util.HashMap<java.lang.String,org.apache.lucene.search.Collector>search(org.apache.lucene.search.TermQuery query) + +
+           
+ + + + + + + +
Methods inherited from class java.util.ArrayList
add, add, addAll, addAll, clear, clone, contains, ensureCapacity, get, indexOf, isEmpty, lastIndexOf, remove, remove, removeRange, set, size, toArray, toArray, trimToSize
+ + + + + + + +
Methods inherited from class java.util.AbstractList
equals, hashCode, iterator, listIterator, listIterator, subList
+ + + + + + + +
Methods inherited from class java.util.AbstractCollection
containsAll, removeAll, retainAll, toString
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, wait, wait, wait
+ + + + + + + +
Methods inherited from interface java.util.List
containsAll, equals, hashCode, iterator, listIterator, listIterator, removeAll, retainAll, subList
+  +

+ + + + + + + + +
+Field Detail
+ +

+serialVersionUID

+
+private static final long serialVersionUID
+
+
+
See Also:
Constant Field Values
+
+
+ +

+MAX_HITS_PER_PAGE

+
+private static final int MAX_HITS_PER_PAGE
+
+
+
See Also:
Constant Field Values
+
+ + + + + + + + +
+Constructor Detail
+ +

+LanguageAnalyzers

+
+public LanguageAnalyzers()
+
+
+ + + + + + + + +
+Method Detail
+ +

+optimize

+
+public void optimize()
+              throws org.apache.lucene.index.CorruptIndexException,
+                     java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+close

+
+public void close()
+           throws org.apache.lucene.index.CorruptIndexException,
+                  java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+addDocument

+
+public void addDocument(org.apache.lucene.document.Document document,
+                        java.lang.String lang)
+                 throws org.apache.lucene.index.CorruptIndexException,
+                        java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+getAnalyzer

+
+public LanguageAnalyzer getAnalyzer(java.lang.String lang)
+
+
+
+
+
+
+ +

+deleteDocuments

+
+public void deleteDocuments(org.apache.lucene.index.Term term)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+deleteDocuments

+
+public void deleteDocuments(org.apache.lucene.search.TermQuery query)
+                     throws org.apache.lucene.index.CorruptIndexException,
+                            java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+deleteDocument

+
+public void deleteDocument(int id)
+                    throws org.apache.lucene.index.CorruptIndexException,
+                           java.io.IOException
+
+
+ +
Throws: +
org.apache.lucene.index.CorruptIndexException +
java.io.IOException
+
+
+
+ +

+search

+
+public java.util.HashMap<java.lang.String,org.apache.lucene.search.Collector> search(org.apache.lucene.search.TermQuery query)
+                                                                              throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/XMLFilteredReader.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/XMLFilteredReader.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,335 @@ + + + + + + +XMLFilteredReader + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer +
+Class XMLFilteredReader

+
+java.lang.Object
+  extended by java.io.Reader
+      extended by java.io.InputStreamReader
+          extended by de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader
+
+
+
All Implemented Interfaces:
java.io.Closeable, java.lang.Readable
+
+
+
+
public class XMLFilteredReader
extends java.io.InputStreamReader
+ + +

+


+ +

+ + + + + + + +
+Field Summary
+ + + + + + + +
Fields inherited from class java.io.Reader
lock
+  + + + + + + + + + + +
+Constructor Summary
XMLFilteredReader(java.io.InputStream in, + java.lang.String charsetName) + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ intread() + +
+           
+ intread(char[] cbuf, + int offset, + int length) + +
+           
+ + + + + + + +
Methods inherited from class java.io.InputStreamReader
close, getEncoding, ready
+ + + + + + + +
Methods inherited from class java.io.Reader
mark, markSupported, read, read, reset, skip
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+XMLFilteredReader

+
+public XMLFilteredReader(java.io.InputStream in,
+                         java.lang.String charsetName)
+                  throws java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.UnsupportedEncodingException
+
+ + + + + + + + +
+Method Detail
+ +

+read

+
+public int read()
+         throws java.io.IOException
+
+
+
Overrides:
read in class java.io.InputStreamReader
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+read

+
+public int read(char[] cbuf,
+                int offset,
+                int length)
+         throws java.io.IOException
+
+
+
Overrides:
read in class java.io.InputStreamReader
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 93 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use +END +LanguageAnalyzers.html +K 25 +svn:wc:ra_dav:version-url +V 116 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/LanguageAnalyzers.html +END +XMLFilteredReader.html +K 25 +svn:wc:ra_dav:version-url +V 116 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/XMLFilteredReader.html +END +LanguageAnalyzer.html +K 25 +svn:wc:ra_dav:version-url +V 115 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/LanguageAnalyzer.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,137 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +LanguageAnalyzer.html +file + + + + +2010-06-28T09:04:53.000000Z +efa44f002e6a557acf2574f6f100507a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8336 + +LanguageAnalyzers.html +file + + + + +2010-06-28T09:04:53.000000Z +92b717ac123a5bbb890736307f56c10d +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13832 + +XMLFilteredReader.html +file + + + + +2010-06-28T09:04:53.000000Z +e5d014f1d5e0ab7cddad7dc8de322eba +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6156 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/prop-base/LanguageAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/prop-base/LanguageAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/prop-base/LanguageAnalyzers.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/prop-base/LanguageAnalyzers.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/prop-base/XMLFilteredReader.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/prop-base/XMLFilteredReader.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/text-base/LanguageAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/text-base/LanguageAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,180 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer

+
+ + + + + + + + + +
+Packages that use LanguageAnalyzer
de.mpiwg.dwinter.lucencetools.analyzer  
+  +

+ + + + + +
+Uses of LanguageAnalyzer in de.mpiwg.dwinter.lucencetools.analyzer
+  +

+ + + + + + + + + +
Methods in de.mpiwg.dwinter.lucencetools.analyzer that return LanguageAnalyzer
+ LanguageAnalyzerLanguageAnalyzers.getAnalyzer(java.lang.String lang) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/text-base/LanguageAnalyzers.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/text-base/LanguageAnalyzers.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,251 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers

+
+ + + + + + + + + + + + + +
+Packages that use LanguageAnalyzers
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
+  +

+ + + + + +
+Uses of LanguageAnalyzers in de.mpiwg.dwinter.fulltextIndexer.harvester
+  +

+ + + + + + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.harvester declared as LanguageAnalyzers
+protected  LanguageAnalyzersHarvesterThreaded.languageAnalyzers + +
+           
+protected  LanguageAnalyzersDocHarvesterThreaded.languageAnalyzers + +
+           
+  +

+ + + + + +
+Uses of LanguageAnalyzers in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.harvester.processors declared as LanguageAnalyzers
+protected  LanguageAnalyzersProcessFileThread.languageAnalyzers + +
+           
+  +

+ + + + + + + + + + + +
Constructors in de.mpiwg.dwinter.fulltextIndexer.harvester.processors with parameters of type LanguageAnalyzers
OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
ProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/text-base/XMLFilteredReader.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/.svn/text-base/XMLFilteredReader.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader

+
+No usage of de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/LanguageAnalyzer.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/LanguageAnalyzer.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,180 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer

+
+ + + + + + + + + +
+Packages that use LanguageAnalyzer
de.mpiwg.dwinter.lucencetools.analyzer  
+  +

+ + + + + +
+Uses of LanguageAnalyzer in de.mpiwg.dwinter.lucencetools.analyzer
+  +

+ + + + + + + + + +
Methods in de.mpiwg.dwinter.lucencetools.analyzer that return LanguageAnalyzer
+ LanguageAnalyzerLanguageAnalyzers.getAnalyzer(java.lang.String lang) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/LanguageAnalyzers.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/LanguageAnalyzers.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,251 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers

+
+ + + + + + + + + + + + + +
+Packages that use LanguageAnalyzers
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
+  +

+ + + + + +
+Uses of LanguageAnalyzers in de.mpiwg.dwinter.fulltextIndexer.harvester
+  +

+ + + + + + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.harvester declared as LanguageAnalyzers
+protected  LanguageAnalyzersHarvesterThreaded.languageAnalyzers + +
+           
+protected  LanguageAnalyzersDocHarvesterThreaded.languageAnalyzers + +
+           
+  +

+ + + + + +
+Uses of LanguageAnalyzers in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
+  +

+ + + + + + + + + +
Fields in de.mpiwg.dwinter.fulltextIndexer.harvester.processors declared as LanguageAnalyzers
+protected  LanguageAnalyzersProcessFileThread.languageAnalyzers + +
+           
+  +

+ + + + + + + + + + + +
Constructors in de.mpiwg.dwinter.fulltextIndexer.harvester.processors with parameters of type LanguageAnalyzers
OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
ProcessFileThread(LanguageAnalyzers languageAnalyzers2, + java.io.File file, + java.lang.String lfn, + java.util.HashMap<java.lang.String,java.lang.String> tl, + java.lang.String mdProviderUrl, + java.lang.String preferedLanguage, + java.util.HashMap<java.lang.String,java.lang.String> languageToISO, + java.util.HashMap<java.lang.String,java.lang.String> supportedLanguageFolder) + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/XMLFilteredReader.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/class-use/XMLFilteredReader.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader

+
+No usage of de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,35 @@ +K 25 +svn:wc:ra_dav:version-url +V 99 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 118 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 117 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-tree.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 116 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 120 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-summary.html +END +DonatusAnalyzer.html +K 25 +svn:wc:ra_dav:version-url +V 120 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/DonatusAnalyzer.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,208 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DonatusAnalyzer.html +file + + + + +2010-06-28T09:04:52.000000Z +27add69203a157db35fdc69b96ccba5e +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7795 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +019440177aac3bd040e89c36d20b95b7 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1043 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +8e411d46f6ce78b70b5b4d01cd02404e +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6654 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +84d6a1f6de7076f3672205fdcbe65445 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6489 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +db1bed13184bc03219b55e5c35478dc5 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5816 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/DonatusAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/DonatusAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/DonatusAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/DonatusAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,204 @@ + + + + + + +DonatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +
+Interface DonatusAnalyzer

+
+
+
public interface DonatusAnalyzer
+ + +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+static java.lang.StringBuffermorphFile + +
+           
+  +

+ + + + + + + + +
+Field Detail
+ +

+morphFile

+
+static final java.lang.StringBuffer morphFile
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,32 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + +
+Interfaces  + +
+DonatusAnalyzer
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +

+ + + + + + + + + +
+Interface Summary
DonatusAnalyzer 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,151 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +

+
+
+
Package Hierarchies:
All Packages
+
+

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer

+
+No usage of de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/DonatusAnalyzer.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/DonatusAnalyzer.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,204 @@ + + + + + + +DonatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +
+Interface DonatusAnalyzer

+
+
+
public interface DonatusAnalyzer
+ + +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+static java.lang.StringBuffermorphFile + +
+           
+  +

+ + + + + + + + +
+Field Detail
+ +

+morphFile

+
+static final java.lang.StringBuffer morphFile
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use +END +DonatusAnalyzer.html +K 25 +svn:wc:ra_dav:version-url +V 130 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/DonatusAnalyzer.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DonatusAnalyzer.html +file + + + + +2010-06-28T09:04:53.000000Z +0daaf96292e4baf71383eb0123f99045 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6367 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/prop-base/DonatusAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/prop-base/DonatusAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/text-base/DonatusAnalyzer.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/.svn/text-base/DonatusAnalyzer.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Interface de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Interface
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer

+
+No usage of de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/DonatusAnalyzer.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/class-use/DonatusAnalyzer.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Interface de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Interface
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer

+
+No usage of de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,32 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + +
+Interfaces  + +
+DonatusAnalyzer
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +

+ + + + + + + + + +
+Interface Summary
DonatusAnalyzer 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,151 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +

+
+
+
Package Hierarchies:
All Packages
+
+

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer

+
+No usage of de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer + + + + +
+Classes  + +
+LanguageAnalyzer +
+LanguageAnalyzers +
+XMLFilteredReader
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,165 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools.analyzer +

+ + + + + + + + + + + + + + + + + +
+Class Summary
LanguageAnalyzer 
LanguageAnalyzers 
XMLFilteredReader 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,167 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.analyzer Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools.analyzer +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • java.util.AbstractCollection<E> (implements java.util.Collection<E>) +
        +
      • java.util.AbstractList<E> (implements java.util.List<E>) +
          +
        • java.util.ArrayList<E> (implements java.lang.Cloneable, java.util.List<E>, java.util.RandomAccess, java.io.Serializable) + +
        +
      +
    • de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer
    • java.io.Reader (implements java.io.Closeable, java.lang.Readable) +
        +
      • java.io.InputStreamReader +
      +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/analyzer/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/analyzer/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,208 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools.analyzer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools.analyzer

+
+ + + + + + + + + + + + + + + + + +
+Packages that use de.mpiwg.dwinter.lucencetools.analyzer
de.mpiwg.dwinter.fulltextIndexer.harvester  
de.mpiwg.dwinter.fulltextIndexer.harvester.processors  
de.mpiwg.dwinter.lucencetools.analyzer  
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.lucencetools.analyzer used by de.mpiwg.dwinter.fulltextIndexer.harvester
LanguageAnalyzers + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.lucencetools.analyzer used by de.mpiwg.dwinter.fulltextIndexer.harvester.processors
LanguageAnalyzers + +
+           
+  +

+ + + + + + + + +
+Classes in de.mpiwg.dwinter.lucencetools.analyzer used by de.mpiwg.dwinter.lucencetools.analyzer
LanguageAnalyzer + +
+           
+  +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +K 25 +svn:wc:ra_dav:version-url +V 84 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/class-use +END +LineCollector.html +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/class-use/LineCollector.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +LineCollector.html +file + + + + +2010-06-28T09:04:53.000000Z +c4709340f42c0f6b429d801fe222d652 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5975 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/prop-base/LineCollector.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/prop-base/LineCollector.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/text-base/LineCollector.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/class-use/.svn/text-base/LineCollector.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.LineCollector + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.LineCollector

+
+No usage of de.mpiwg.dwinter.lucencetools.LineCollector +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/class-use/LineCollector.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/class-use/LineCollector.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.LineCollector + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.LineCollector

+
+No usage of de.mpiwg.dwinter.lucencetools.LineCollector +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,47 @@ +K 25 +svn:wc:ra_dav:version-url +V 84 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents +END +FileDocument.html +K 25 +svn:wc:ra_dav:version-url +V 102 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/FileDocument.html +END +package-frame.html +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/package-frame.html +END +package-tree.html +K 25 +svn:wc:ra_dav:version-url +V 102 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/package-tree.html +END +MorphDocument.html +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/MorphDocument.html +END +package-use.html +K 25 +svn:wc:ra_dav:version-url +V 101 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/package-use.html +END +package-summary.html +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/package-summary.html +END +OcropusLineDocument.html +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/OcropusLineDocument.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,276 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +FileDocument.html +file + + + + +2010-06-28T09:04:52.000000Z +6b36868e5fc07bc194406a5cf50cdac8 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +11587 + +MorphDocument.html +file + + + + +2010-06-28T09:04:52.000000Z +09863c99e5f54c530f7e53dcca0f4edb +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +10693 + +OcropusLineDocument.html +file + + + + +2010-06-28T09:04:52.000000Z +62dd081c36aff269646f688a734fc79a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13562 + +class-use +dir + +package-frame.html +file + + + + +2010-06-28T09:04:53.000000Z +0488df2c056a167eeb7957d3396b37a8 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1227 + +package-summary.html +file + + + + +2010-06-28T09:04:53.000000Z +a11531a58cc3974ba9708153a7a21ed3 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6829 + +package-tree.html +file + + + + +2010-06-28T09:04:53.000000Z +335d513a947ac83868eb4de98d6e8d80 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6639 + +package-use.html +file + + + + +2010-06-28T09:04:53.000000Z +f5bec40e0b1bac9f5d655e7462a3de11 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5681 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/FileDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/FileDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/MorphDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/MorphDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/OcropusLineDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/OcropusLineDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/FileDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/FileDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,287 @@ + + + + + + +FileDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.documents +
+Class FileDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.documents.FileDocument
+
+
+
+
public class FileDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
FileDocument() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + java.lang.String textId) + +
+           
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+FileDocument

+
+public FileDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           java.lang.String textId)
+                                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           java.lang.String dcMetaData,
+                                                           java.lang.String textId)
+                                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/MorphDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/MorphDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,259 @@ + + + + + + +MorphDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.documents +
+Class MorphDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.documents.MorphDocument
+
+
+
+
public class MorphDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
MorphDocument() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+static org.apache.lucene.document.DocumentDocument(java.lang.String donatusMorph, + java.lang.String path) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+MorphDocument

+
+public MorphDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.lang.String donatusMorph,
+                                                           java.lang.String path)
+                                                    throws java.io.FileNotFoundException,
+                                                           java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,299 @@ + + + + + + +OcropusLineDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.documents +
+Class OcropusLineDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument
+
+
+
+
public class OcropusLineDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OcropusLineDocument() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String textId) + +
+           
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OcropusLineDocument

+
+public OcropusLineDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           OCRDocument.OCRLine ocrline,
+                                                           java.lang.String pageDimension,
+                                                           java.lang.String textId)
+                                                    throws java.io.FileNotFoundException,
+                                                           java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           OCRDocument.OCRLine ocrline,
+                                                           java.lang.String pageDimension,
+                                                           java.lang.String dcMetaData,
+                                                           java.lang.String textId)
+                                                    throws java.io.FileNotFoundException,
+                                                           java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-frame.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-frame.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.documents + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools.documents + + + + +
+Classes  + +
+FileDocument +
+MorphDocument +
+OcropusLineDocument
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-summary.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-summary.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,165 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.documents + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools.documents +

+ + + + + + + + + + + + + + + + + +
+Class Summary
FileDocument 
MorphDocument 
OcropusLineDocument 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-tree.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-tree.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,153 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.documents Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools.documents +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-use.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/package-use.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools.documents + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools.documents

+
+No usage of de.mpiwg.dwinter.lucencetools.documents +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/FileDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/FileDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,287 @@ + + + + + + +FileDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.documents +
+Class FileDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.documents.FileDocument
+
+
+
+
public class FileDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
FileDocument() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + java.lang.String textId) + +
+           
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+FileDocument

+
+public FileDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           java.lang.String textId)
+                                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           java.lang.String dcMetaData,
+                                                           java.lang.String textId)
+                                                    throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/MorphDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/MorphDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,259 @@ + + + + + + +MorphDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.documents +
+Class MorphDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.documents.MorphDocument
+
+
+
+
public class MorphDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
MorphDocument() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+static org.apache.lucene.document.DocumentDocument(java.lang.String donatusMorph, + java.lang.String path) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+MorphDocument

+
+public MorphDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.lang.String donatusMorph,
+                                                           java.lang.String path)
+                                                    throws java.io.FileNotFoundException,
+                                                           java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/OcropusLineDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/OcropusLineDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,299 @@ + + + + + + +OcropusLineDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +de.mpiwg.dwinter.lucencetools.documents +
+Class OcropusLineDocument

+
+java.lang.Object
+  extended by de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument
+
+
+
+
public class OcropusLineDocument
extends java.lang.Object
+ + +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
OcropusLineDocument() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String textId) + +
+           
+static org.apache.lucene.document.DocumentDocument(java.io.File f, + java.lang.String language, + OCRDocument.OCRLine ocrline, + java.lang.String pageDimension, + java.lang.String dcMetaData, + java.lang.String textId) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+OcropusLineDocument

+
+public OcropusLineDocument()
+
+
+ + + + + + + + +
+Method Detail
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           OCRDocument.OCRLine ocrline,
+                                                           java.lang.String pageDimension,
+                                                           java.lang.String textId)
+                                                    throws java.io.FileNotFoundException,
+                                                           java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+
+ +

+Document

+
+public static org.apache.lucene.document.Document Document(java.io.File f,
+                                                           java.lang.String language,
+                                                           OCRDocument.OCRLine ocrline,
+                                                           java.lang.String pageDimension,
+                                                           java.lang.String dcMetaData,
+                                                           java.lang.String textId)
+                                                    throws java.io.FileNotFoundException,
+                                                           java.io.UnsupportedEncodingException
+
+
+ +
Throws: +
java.io.FileNotFoundException +
java.io.UnsupportedEncodingException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 94 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/class-use +END +FileDocument.html +K 25 +svn:wc:ra_dav:version-url +V 112 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/FileDocument.html +END +MorphDocument.html +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/MorphDocument.html +END +OcropusLineDocument.html +K 25 +svn:wc:ra_dav:version-url +V 119 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/OcropusLineDocument.html +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,137 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/de/mpiwg/dwinter/lucencetools/documents/class-use +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +FileDocument.html +file + + + + +2010-06-28T09:04:53.000000Z +f28ce8ef86a4b6fea41bd345ba4e1991 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6116 + +MorphDocument.html +file + + + + +2010-06-28T09:04:53.000000Z +62728d56ff2204a7a066a5d84fd45d34 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6126 + +OcropusLineDocument.html +file + + + + +2010-06-28T09:04:53.000000Z +f3a14ff83493e3476ed26f94cd5a95bd +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6186 + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/prop-base/FileDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/prop-base/FileDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/prop-base/MorphDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/prop-base/MorphDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/prop-base/OcropusLineDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/prop-base/OcropusLineDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/text-base/FileDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/text-base/FileDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.documents.FileDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.documents.FileDocument

+
+No usage of de.mpiwg.dwinter.lucencetools.documents.FileDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/text-base/MorphDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/text-base/MorphDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.documents.MorphDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.documents.MorphDocument

+
+No usage of de.mpiwg.dwinter.lucencetools.documents.MorphDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/text-base/OcropusLineDocument.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/.svn/text-base/OcropusLineDocument.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument

+
+No usage of de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/FileDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/FileDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.documents.FileDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.documents.FileDocument

+
+No usage of de.mpiwg.dwinter.lucencetools.documents.FileDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/MorphDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/MorphDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.documents.MorphDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.documents.MorphDocument

+
+No usage of de.mpiwg.dwinter.lucencetools.documents.MorphDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/class-use/OcropusLineDocument.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/class-use/OcropusLineDocument.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Class
de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument

+
+No usage of de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,36 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.documents + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools.documents + + + + +
+Classes  + +
+FileDocument +
+MorphDocument +
+OcropusLineDocument
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,165 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.documents + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools.documents +

+ + + + + + + + + + + + + + + + + +
+Class Summary
FileDocument 
MorphDocument 
OcropusLineDocument 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,153 @@ + + + + + + +de.mpiwg.dwinter.lucencetools.documents Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools.documents +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/documents/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/documents/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools.documents + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools.documents

+
+No usage of de.mpiwg.dwinter.lucencetools.documents +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/package-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/package-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,32 @@ + + + + + + +de.mpiwg.dwinter.lucencetools + + + + + + + + + + + +de.mpiwg.dwinter.lucencetools + + + + +
+Classes  + +
+LineCollector
+ + + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/package-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/package-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +de.mpiwg.dwinter.lucencetools + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package de.mpiwg.dwinter.lucencetools +

+ + + + + + + + + +
+Class Summary
LineCollector 
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/package-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/package-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,159 @@ + + + + + + +de.mpiwg.dwinter.lucencetools Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package de.mpiwg.dwinter.lucencetools +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • org.apache.lucene.search.Collector
        +
      • org.apache.lucene.search.TopDocsCollector<T>
          +
        • org.apache.lucene.search.TopScoreDocCollector +
        +
      +
    +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/de/mpiwg/dwinter/lucencetools/package-use.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/de/mpiwg/dwinter/lucencetools/package-use.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,144 @@ + + + + + + +Uses of Package de.mpiwg.dwinter.lucencetools + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Uses of Package
de.mpiwg.dwinter.lucencetools

+
+No usage of de.mpiwg.dwinter.lucencetools +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/deprecated-list.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/deprecated-list.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,146 @@ + + + + + + +Deprecated List + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Deprecated API

+
+
+Contents
    +
+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/help-doc.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/help-doc.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,223 @@ + + + + + + +API Help + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+How This API Document Is Organized

+
+This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

+Overview

+
+ +

+The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

+

+Package

+
+ +

+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

    +
  • Interfaces (italic)
  • Classes
  • Enums
  • Exceptions
  • Errors
  • Annotation Types
+
+

+Class/Interface

+
+ +

+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
  • Class inheritance diagram
  • Direct Subclasses
  • All Known Subinterfaces
  • All Known Implementing Classes
  • Class/interface declaration
  • Class/interface description +

    +

  • Nested Class Summary
  • Field Summary
  • Constructor Summary
  • Method Summary +

    +

  • Field Detail
  • Constructor Detail
  • Method Detail
+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+ +

+Annotation Type

+
+ +

+Each annotation type has its own separate page with the following sections:

    +
  • Annotation Type declaration
  • Annotation Type description
  • Required Element Summary
  • Optional Element Summary
  • Element Detail
+
+ +

+Enum

+
+ +

+Each enum has its own separate page with the following sections:

    +
  • Enum declaration
  • Enum description
  • Enum Constant Summary
  • Enum Constant Detail
+
+

+Use

+
+Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
+

+Tree (Class Hierarchy)

+
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.
    +
  • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
  • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
+
+

+Deprecated API

+
+The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+

+Index

+
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+

+Prev/Next

+These links take you to the next or previous class, interface, package, or related page.

+Frames/No Frames

+These links show and hide the HTML frames. All pages are available with or without frames. +

+

+Serialized Form

+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description. +

+

+Constant Field Values

+The Constant Field Values page lists the static final fields and their values. +

+ + +This help file applies to API documentation generated using the standard doclet. + +
+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,125 @@ +K 25 +svn:wc:ra_dav:version-url +V 56 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files +END +index-20.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-20.html +END +index-4.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-4.html +END +index-3.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-3.html +END +index-2.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-2.html +END +index-1.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-1.html +END +index-19.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-19.html +END +index-18.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-18.html +END +index-17.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-17.html +END +index-16.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-16.html +END +index-15.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-15.html +END +index-14.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-14.html +END +index-9.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-9.html +END +index-13.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-13.html +END +index-8.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-8.html +END +index-12.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-12.html +END +index-7.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-7.html +END +index-11.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-11.html +END +index-6.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-6.html +END +index-10.html +K 25 +svn:wc:ra_dav:version-url +V 70 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-10.html +END +index-5.html +K 25 +svn:wc:ra_dav:version-url +V 69 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/index-files/index-5.html +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,715 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/index-files +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +index-1.html +file + + + + +2010-06-28T09:04:53.000000Z +572ec3c0b82c337cab6e52f12e49184a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8817 + +index-10.html +file + + + + +2010-06-28T09:04:53.000000Z +10471fc98a3b84dfe073bc3d8a3953a2 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +15064 + +index-11.html +file + + + + +2010-06-28T09:04:53.000000Z +5075e8e48e8df6e3936fbb5feb329b5b +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14695 + +index-12.html +file + + + + +2010-06-28T09:04:53.000000Z +c094ca2323bdd246240a2679a3d58dc5 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6815 + +index-13.html +file + + + + +2010-06-28T09:04:53.000000Z +f6468d3b6a1966a69f8564d4e92ae13a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +15659 + +index-14.html +file + + + + +2010-06-28T09:04:53.000000Z +952b66c53309e6486111e3e322cfde77 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +15892 + +index-15.html +file + + + + +2010-06-28T09:04:53.000000Z +85aeffe826751fd0fbd78d7f1f99dfc5 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +8198 + +index-16.html +file + + + + +2010-06-28T09:04:53.000000Z +52dd2224a021d216148d3b5943b74ad7 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +16038 + +index-17.html +file + + + + +2010-06-28T09:04:53.000000Z +ad99b207e006b21e220b5f75cc73ea78 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7962 + +index-18.html +file + + + + +2010-06-28T09:04:53.000000Z +92a6f68d9099881de2f91929cdfcdfa1 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6733 + +index-19.html +file + + + + +2010-06-28T09:04:53.000000Z +7115bc0fbd6f9e1b50f13dabc99adab0 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7486 + +index-2.html +file + + + + +2010-06-28T09:04:53.000000Z +5858ca138e6c23e383bd2602cb77296a +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +6750 + +index-20.html +file + + + + +2010-06-28T09:04:53.000000Z +9aca3707aeb6fd7407983d2833cfa60d +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +7060 + +index-3.html +file + + + + +2010-06-28T09:04:53.000000Z +b5b24c5b1eda003e4698686b32f13525 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13432 + +index-4.html +file + + + + +2010-06-28T09:04:53.000000Z +7d8692df3b9bee44e24002faccdbbe77 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +28061 + +index-5.html +file + + + + +2010-06-28T09:04:53.000000Z +c0ad67fddff6ecc262aa3f639e1c56eb +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +10918 + +index-6.html +file + + + + +2010-06-28T09:04:53.000000Z +9739ce2e05345ee91ff4e522bbf33129 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +10017 + +index-7.html +file + + + + +2010-06-28T09:04:53.000000Z +6fe15cf02633c203faf8ba9579eb4360 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +10114 + +index-8.html +file + + + + +2010-06-28T09:04:53.000000Z +4be9eba3d558ee3a4f770481a72b977f +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +12042 + +index-9.html +file + + + + +2010-06-28T09:04:53.000000Z +daa3237cac86e6646bceecb3b9bb65f9 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +14722 + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-1.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-1.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-10.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-10.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-11.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-11.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-12.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-12.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-13.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-13.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-14.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-14.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-15.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-15.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-16.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-16.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-17.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-17.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-18.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-18.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-19.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-19.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-2.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-2.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-20.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-20.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-3.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-3.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-4.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-4.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-5.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-5.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-6.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-6.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-7.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-7.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-8.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-8.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/prop-base/index-9.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/prop-base/index-9.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-1.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-1.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,160 @@ + + + + + + +A-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+A

+
+
absPathToTextId(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
acceptsDocsOutOfOrder() - +Method in class de.mpiwg.dwinter.lucencetools.LineCollector +
  +
addDocument(File, String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread +
  +
addDocument(File, String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
addDocument(Document, String) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
analyzer - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-10.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-10.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,205 @@ + + + + + + +L-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+L

+
+
lang - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
lang - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
LanguageAnalyzer - Class in de.mpiwg.dwinter.lucencetools.analyzer
 
LanguageAnalyzer(String, Analyzer, File, boolean) - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
LanguageAnalyzer(String, Analyzer, File) - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
languageAnalyzers - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
languageAnalyzers - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
languageAnalyzers - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
LanguageAnalyzers - Class in de.mpiwg.dwinter.lucencetools.analyzer
 
LanguageAnalyzers() - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
languageFileName - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
languageFileName - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
languageFileName - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
languageToISO - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
languageToISO - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
languageToISO - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
LineCollector - Class in de.mpiwg.dwinter.lucencetools
 
LineCollector(int) - +Constructor for class de.mpiwg.dwinter.lucencetools.LineCollector +
  +
lineCounter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
lineCounter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
lineNumber - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
loadLanguages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
loadLanguages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
loadLanguages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-11.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-11.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,205 @@ + + + + + + +M-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+M

+
+
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
Index all text files under a directory. +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
Index all text files under a directory. +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
Index all text files under a directory. +
MAX_HITS_PER_PAGE - +Static variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
MAXFILES - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
MAXFILES - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
maxThread - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
maxThread - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
mdProviderUrl - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
mdProviderUrl - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
mdProviderUrl - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
mode - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
MorphDocument - Class in de.mpiwg.dwinter.lucencetools.documents
 
MorphDocument() - +Constructor for class de.mpiwg.dwinter.lucencetools.documents.MorphDocument +
  +
morphFile - +Static variable in interface de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer +
  +
mythreads - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
mythreads - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-12.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-12.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +N-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+N

+
+
NEW_FILE - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-13.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-13.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,190 @@ + + + + + + +O-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+O

+
+
OCRDocument - Class in de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
OCRDocument(String, String, ArrayList<OCRDocument.OCRLine>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
OCRDocument() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
ocrDocument - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
ocrDocument - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
OCRDocument.OCRLine - Class in de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
OCRDocument.OCRLine(String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
OCRDocument.OCRLine() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
OCRHarvesterCLI - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
OCRHarvesterCLI() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
OCRHarvesterCLIMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
OCRHarvesterCLIMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
OCRHarvesterCLIRDFMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
OCRHarvesterCLIRDFMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
  +
OCRHarvesterThreaded - Class in de.mpiwg.dwinter.fulltextIndexer.harvester
 
OCRHarvesterThreaded() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
OCRHarvesterThreaded(File, File, String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
OCRHarvesterThreaded(File, File, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
OCRLines - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
OcropusLineDocument - Class in de.mpiwg.dwinter.lucencetools.documents
 
OcropusLineDocument() - +Constructor for class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +
  +
OCRProcessFileThread - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
 
OCRProcessFileThread(LanguageAnalyzers, File, String, HashMap<String, String>, String, String, HashMap<String, String>, HashMap<String, String>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread +
  +
optimize() - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-14.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-14.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,202 @@ + + + + + + +P-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+P

+
+
pageDimension - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
ParseIndexMeta - Class in de.mpiwg.dwinter.fulltextIndexer.utils
 
ParseIndexMeta() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
ParseOcrDocument - Class in de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
ParseOcrDocument() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
ParseOcrDocument - Class in de.mpiwg.dwinter.fulltextIndexer.utils
 
ParseOcrDocument() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
PREFIX - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
processCompleteFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
ProcessFileThread - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
 
ProcessFileThread(File, File, String, File, String, HashMap<String, String>, HashMap<String, String>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
ProcessFileThread(LanguageAnalyzers, File, String, HashMap<String, String>, String, String, HashMap<String, String>, HashMap<String, String>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
processingInstruction(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
processingInstruction(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
processingInstruction(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
processThisFile - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-15.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-15.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +R-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+R

+
+
read() - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +
  +
read(char[], int, int) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +
  +
reader - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
resolveEntity(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
run() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-16.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-16.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,217 @@ + + + + + + +S-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+S

+
+
search(TermQuery) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
searcher - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
serialVersionUID - +Static variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
setDocumentLocator(Locator) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
setDocumentLocator(Locator) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
setDocumentLocator(Locator) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
setIndexMetaPriority(boolean) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
skippedEntity(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
skippedEntity(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
skippedEntity(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
specialMode - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
specialMode - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
specialMode - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
startDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
startDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
startDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
startElement(String, String, String, Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
startElement(String, String, String, Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
startElement(String, String, String, Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
startPrefixMapping(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
startPrefixMapping(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
startPrefixMapping(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
supportedLanguageFolder - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
supportedLanguageFolder - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
supportedLanguageFolder - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-17.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-17.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,154 @@ + + + + + + +T-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+T

+
+
TEXTIDFROMPATH_REGEXP - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
textLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
textLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
textLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-18.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-18.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +V-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+V

+
+
value - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-19.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-19.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,151 @@ + + + + + + +W-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+W

+
+
waitForFreeThread() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
waitForFreeThread() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
writer - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-2.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-2.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +B-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+B

+
+
bbox - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-20.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-20.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +X-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+X

+
+
XMLFilteredReader - Class in de.mpiwg.dwinter.lucencetools.analyzer
 
XMLFilteredReader(InputStream, String) - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-3.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-3.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,199 @@ + + + + + + +C-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+C

+
+
characters(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
characters(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
characters(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
checkFileAndRemoveOldFile(String, String, boolean, long) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
checkFileExists(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
checkSupportedLanguages(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
clearFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
close() - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
collect(int) - +Method in class de.mpiwg.dwinter.lucencetools.LineCollector +
  +
compose(File, File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
COMPOSEDFN - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
content - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
counter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
counter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
counter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
CREATE_NEW - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
currentLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
currentLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
currentToken - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-4.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-4.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,271 @@ + + + + + + +D-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+D

+
+
de.mpiwg.dwinter.fulltextIndexer.harvester - package de.mpiwg.dwinter.fulltextIndexer.harvester
 
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI - package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
de.mpiwg.dwinter.fulltextIndexer.harvester.processors - package de.mpiwg.dwinter.fulltextIndexer.harvester.processors
 
de.mpiwg.dwinter.fulltextIndexer.OCRutils - package de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
de.mpiwg.dwinter.fulltextIndexer.utils - package de.mpiwg.dwinter.fulltextIndexer.utils
 
de.mpiwg.dwinter.lucencetools - package de.mpiwg.dwinter.lucencetools
 
de.mpiwg.dwinter.lucencetools.analyzer - package de.mpiwg.dwinter.lucencetools.analyzer
 
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer - package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer
 
de.mpiwg.dwinter.lucencetools.documents - package de.mpiwg.dwinter.lucencetools.documents
 
DEBUG - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
DEBUG - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
deduceFromFolderName(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
deduceFromFolderPriority - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
DELETED_OLD_VERSION - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
DELETED_WRONG_LANGUAGE - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
deleteDocument(int) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
deleteDocuments(Term) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
deleteDocuments(TermQuery) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
docDir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
docDir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
docDir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
DocHarvesterCLIMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
DocHarvesterCLIMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
DocHarvesterCLIRDFMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
DocHarvesterCLIRDFMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
  +
DocHarvesterThreaded - Class in de.mpiwg.dwinter.fulltextIndexer.harvester
 
DocHarvesterThreaded() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
DocHarvesterThreaded(File, File, String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
DocHarvesterThreaded(File, File, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
Document(File, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.FileDocument +
  +
Document(File, String, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.FileDocument +
  +
Document(String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.MorphDocument +
  +
Document(File, String, OCRDocument.OCRLine, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +
  +
Document(File, String, OCRDocument.OCRLine, String, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +
  +
doLine(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
doLine(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
DonatusAnalyzer - Interface in de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer
 
done - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
doPage(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
doPage(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
doTheHarvest(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
doTheHarvest(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
  +
doTheHarvest(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
doTheHarvest(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
doTheHarvest(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
  +
doTheHarvest(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
doTheHarvest(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
doTheHarvest(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
  +
doTheHarvestLanguage(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
doTheHarvestLanguage(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
  +
doTheHarvestLanguage(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
doTheHarvestLanguage(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
doTheHarvestLanguage(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
  +
doTheHarvestLanguage(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
doTheHarvestLanguage(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
doTheHarvestLanguage(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-5.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-5.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,178 @@ + + + + + + +E-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+E

+
+
endDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
endDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
endDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
endElement(String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
endElement(String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
endElement(String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
endPrefixMapping(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
endPrefixMapping(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
endPrefixMapping(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
excludeFolders - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
excludeFolders - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
excludeFolders - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-6.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-6.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,169 @@ + + + + + + +F-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+F

+
+
FILE_EXISTS - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
filecount - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
filecount - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
FileDocument - Class in de.mpiwg.dwinter.lucencetools.documents
 
FileDocument() - +Constructor for class de.mpiwg.dwinter.lucencetools.documents.FileDocument +
  +
filename - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
fileTypesToIndex - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
fileTypesToIndex - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
fileTypesToIndex - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
finishFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-7.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-7.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,169 @@ + + + + + + +G-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+G

+
+
getAnalyzer(String) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
getComposedFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
getDCFromIndexMeta(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getFileListFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
getFileListFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
getIndexMetaPriority() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getLanguageFromIndexMeta(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getLanguageOfText(String, File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getTextId(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-8.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-8.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,175 @@ + + + + + + +H-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+H

+
+
HarvesterCLI - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
HarvesterCLI() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
HarvesterCLIMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
HarvesterCLIMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
HarvesterCLIRDFMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
HarvesterCLIRDFMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
  +
HarvesterThreaded - Class in de.mpiwg.dwinter.fulltextIndexer.harvester
 
HarvesterThreaded() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
HarvesterThreaded(File, File, String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
HarvesterThreaded(File, File, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
harvestFolder() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
harvestFolder() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
harvestFolder() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
harvestFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
harvestFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/.svn/text-base/index-9.html.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/.svn/text-base/index-9.html.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,208 @@ + + + + + + +I-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+I

+
+
ignorableWhitespace(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
ignorableWhitespace(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
ignorableWhitespace(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
index_dir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
index_dir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
index_dir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
indexDocs(ArrayList<String>) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
indexDocs(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
indexDocs(ArrayList<String>) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
indexDocs(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
indexDocs(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
indexMetaPriority - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
indexMetaPriority - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
indexMetaPriority - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
init_languages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
init_languages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
init_languages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
inLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
inLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
isTextFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
isTextFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
isTextFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-1.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-1.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,160 @@ + + + + + + +A-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+A

+
+
absPathToTextId(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
acceptsDocsOutOfOrder() - +Method in class de.mpiwg.dwinter.lucencetools.LineCollector +
  +
addDocument(File, String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread +
  +
addDocument(File, String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
addDocument(Document, String) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
analyzer - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-10.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-10.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,205 @@ + + + + + + +L-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+L

+
+
lang - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
lang - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
LanguageAnalyzer - Class in de.mpiwg.dwinter.lucencetools.analyzer
 
LanguageAnalyzer(String, Analyzer, File, boolean) - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
LanguageAnalyzer(String, Analyzer, File) - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
languageAnalyzers - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
languageAnalyzers - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
languageAnalyzers - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
LanguageAnalyzers - Class in de.mpiwg.dwinter.lucencetools.analyzer
 
LanguageAnalyzers() - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
languageFileName - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
languageFileName - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
languageFileName - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
languageToISO - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
languageToISO - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
languageToISO - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
LineCollector - Class in de.mpiwg.dwinter.lucencetools
 
LineCollector(int) - +Constructor for class de.mpiwg.dwinter.lucencetools.LineCollector +
  +
lineCounter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
lineCounter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
lineNumber - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
loadLanguages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
loadLanguages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
loadLanguages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-11.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-11.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,205 @@ + + + + + + +M-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+M

+
+
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
Index all text files under a directory. +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
Index all text files under a directory. +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
main(String[]) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
Index all text files under a directory. +
MAX_HITS_PER_PAGE - +Static variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
MAXFILES - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
MAXFILES - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
maxThread - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
maxThread - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
mdProviderUrl - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
mdProviderUrl - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
mdProviderUrl - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
mode - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
MorphDocument - Class in de.mpiwg.dwinter.lucencetools.documents
 
MorphDocument() - +Constructor for class de.mpiwg.dwinter.lucencetools.documents.MorphDocument +
  +
morphFile - +Static variable in interface de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer +
  +
mythreads - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
mythreads - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-12.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-12.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +N-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+N

+
+
NEW_FILE - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-13.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-13.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,190 @@ + + + + + + +O-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+O

+
+
OCRDocument - Class in de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
OCRDocument(String, String, ArrayList<OCRDocument.OCRLine>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
OCRDocument() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
ocrDocument - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
ocrDocument - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
OCRDocument.OCRLine - Class in de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
OCRDocument.OCRLine(String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
OCRDocument.OCRLine() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
OCRHarvesterCLI - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
OCRHarvesterCLI() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
OCRHarvesterCLIMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
OCRHarvesterCLIMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
OCRHarvesterCLIRDFMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
OCRHarvesterCLIRDFMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
  +
OCRHarvesterThreaded - Class in de.mpiwg.dwinter.fulltextIndexer.harvester
 
OCRHarvesterThreaded() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
OCRHarvesterThreaded(File, File, String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
OCRHarvesterThreaded(File, File, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
OCRLines - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
OcropusLineDocument - Class in de.mpiwg.dwinter.lucencetools.documents
 
OcropusLineDocument() - +Constructor for class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +
  +
OCRProcessFileThread - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
 
OCRProcessFileThread(LanguageAnalyzers, File, String, HashMap<String, String>, String, String, HashMap<String, String>, HashMap<String, String>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread +
  +
optimize() - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-14.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-14.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,202 @@ + + + + + + +P-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+P

+
+
pageDimension - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
ParseIndexMeta - Class in de.mpiwg.dwinter.fulltextIndexer.utils
 
ParseIndexMeta() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
ParseOcrDocument - Class in de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
ParseOcrDocument() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
ParseOcrDocument - Class in de.mpiwg.dwinter.fulltextIndexer.utils
 
ParseOcrDocument() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
preferedLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
PREFIX - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
processCompleteFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
processFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
ProcessFileThread - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.processors
 
ProcessFileThread(File, File, String, File, String, HashMap<String, String>, HashMap<String, String>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
ProcessFileThread(LanguageAnalyzers, File, String, HashMap<String, String>, String, String, HashMap<String, String>, HashMap<String, String>) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
processingInstruction(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
processingInstruction(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
processingInstruction(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
processThisFile - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-15.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-15.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,157 @@ + + + + + + +R-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+R

+
+
read() - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +
  +
read(char[], int, int) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +
  +
reader - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
resolveEntity(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
run() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-16.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-16.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,217 @@ + + + + + + +S-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+S

+
+
search(TermQuery) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
searcher - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
serialVersionUID - +Static variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
setDocumentLocator(Locator) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
setDocumentLocator(Locator) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
setDocumentLocator(Locator) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
setIndexMetaPriority(boolean) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
skippedEntity(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
skippedEntity(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
skippedEntity(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
specialMode - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
specialMode - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
specialMode - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
startDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
startDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
startDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
startElement(String, String, String, Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
startElement(String, String, String, Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
startElement(String, String, String, Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
startPrefixMapping(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
startPrefixMapping(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
startPrefixMapping(String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
supportedLanguageFolder - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
supportedLanguageFolder - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
supportedLanguageFolder - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-17.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-17.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,154 @@ + + + + + + +T-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+T

+
+
TEXTIDFROMPATH_REGEXP - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
textLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
textLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
textLanguage - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-18.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-18.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +V-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+V

+
+
value - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-19.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-19.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,151 @@ + + + + + + +W-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+W

+
+
waitForFreeThread() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
waitForFreeThread() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
writer - +Variable in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-2.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-2.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +B-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+B

+
+
bbox - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-20.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-20.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,145 @@ + + + + + + +X-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+X

+
+
XMLFilteredReader - Class in de.mpiwg.dwinter.lucencetools.analyzer
 
XMLFilteredReader(InputStream, String) - +Constructor for class de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-3.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-3.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,199 @@ + + + + + + +C-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+C

+
+
characters(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
characters(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
characters(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
checkFileAndRemoveOldFile(String, String, boolean, long) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
checkFileExists(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
checkSupportedLanguages(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
clearFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
close() - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
collect(int) - +Method in class de.mpiwg.dwinter.lucencetools.LineCollector +
  +
compose(File, File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
COMPOSEDFN - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
content - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine +
  +
counter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
counter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
counter - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
CREATE_NEW - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
currentLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
currentLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
currentToken - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-4.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-4.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,271 @@ + + + + + + +D-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+D

+
+
de.mpiwg.dwinter.fulltextIndexer.harvester - package de.mpiwg.dwinter.fulltextIndexer.harvester
 
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI - package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
de.mpiwg.dwinter.fulltextIndexer.harvester.processors - package de.mpiwg.dwinter.fulltextIndexer.harvester.processors
 
de.mpiwg.dwinter.fulltextIndexer.OCRutils - package de.mpiwg.dwinter.fulltextIndexer.OCRutils
 
de.mpiwg.dwinter.fulltextIndexer.utils - package de.mpiwg.dwinter.fulltextIndexer.utils
 
de.mpiwg.dwinter.lucencetools - package de.mpiwg.dwinter.lucencetools
 
de.mpiwg.dwinter.lucencetools.analyzer - package de.mpiwg.dwinter.lucencetools.analyzer
 
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer - package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer
 
de.mpiwg.dwinter.lucencetools.documents - package de.mpiwg.dwinter.lucencetools.documents
 
DEBUG - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
DEBUG - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
deduceFromFolderName(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
deduceFromFolderPriority - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
DELETED_OLD_VERSION - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
DELETED_WRONG_LANGUAGE - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
deleteDocument(int) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
deleteDocuments(Term) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
deleteDocuments(TermQuery) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
docDir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
docDir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
docDir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
DocHarvesterCLIMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
DocHarvesterCLIMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
DocHarvesterCLIRDFMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
DocHarvesterCLIRDFMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
  +
DocHarvesterThreaded - Class in de.mpiwg.dwinter.fulltextIndexer.harvester
 
DocHarvesterThreaded() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
DocHarvesterThreaded(File, File, String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
DocHarvesterThreaded(File, File, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
Document(File, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.FileDocument +
  +
Document(File, String, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.FileDocument +
  +
Document(String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.MorphDocument +
  +
Document(File, String, OCRDocument.OCRLine, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +
  +
Document(File, String, OCRDocument.OCRLine, String, String, String) - +Static method in class de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument +
  +
doLine(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
doLine(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
DonatusAnalyzer - Interface in de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer
 
done - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
doPage(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
doPage(Attributes) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
doTheHarvest(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
doTheHarvest(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
  +
doTheHarvest(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
doTheHarvest(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
doTheHarvest(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
  +
doTheHarvest(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
doTheHarvest(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
doTheHarvest(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
  +
doTheHarvestLanguage(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD +
  +
doTheHarvestLanguage(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD +
  +
doTheHarvestLanguage(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
doTheHarvestLanguage(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
doTheHarvestLanguage(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
  +
doTheHarvestLanguage(File, File, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI +
  +
doTheHarvestLanguage(File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD +
  +
doTheHarvestLanguage(File, File, File, String, String) - +Static method in class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-5.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-5.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,178 @@ + + + + + + +E-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+E

+
+
endDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
endDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
endDocument() - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
endElement(String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
endElement(String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
endElement(String, String, String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
endPrefixMapping(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
endPrefixMapping(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
endPrefixMapping(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
excludeFolders - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
excludeFolders - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
excludeFolders - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-6.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-6.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,169 @@ + + + + + + +F-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+F

+
+
FILE_EXISTS - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
filecount - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
filecount - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
FileDocument - Class in de.mpiwg.dwinter.lucencetools.documents
 
FileDocument() - +Constructor for class de.mpiwg.dwinter.lucencetools.documents.FileDocument +
  +
filename - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument +
  +
fileTypesToIndex - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
fileTypesToIndex - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
fileTypesToIndex - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
finishFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-7.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-7.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,169 @@ + + + + + + +G-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+G

+
+
getAnalyzer(String) - +Method in class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers +
  +
getComposedFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
getDCFromIndexMeta(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getFileListFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
getFileListFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
getIndexMetaPriority() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getLanguageFromIndexMeta(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getLanguageOfText(String, File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
getTextId(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-8.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-8.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,175 @@ + + + + + + +H-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+H

+
+
HarvesterCLI - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
HarvesterCLI() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI +
  +
HarvesterCLIMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
HarvesterCLIMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD +
  +
HarvesterCLIRDFMD - Class in de.mpiwg.dwinter.fulltextIndexer.harvester.CLI
 
HarvesterCLIRDFMD() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD +
  +
HarvesterThreaded - Class in de.mpiwg.dwinter.fulltextIndexer.harvester
 
HarvesterThreaded() - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
HarvesterThreaded(File, File, String, String, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
HarvesterThreaded(File, File, String) - +Constructor for class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
harvestFolder() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
harvestFolder() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
harvestFolder() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
harvestFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
harvestFromRDF(String) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index-files/index-9.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index-files/index-9.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,208 @@ + + + + + + +I-Index + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+

+I

+
+
ignorableWhitespace(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
ignorableWhitespace(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta +
  +
ignorableWhitespace(char[], int, int) - +Method in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
index_dir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
index_dir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
index_dir - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
indexDocs(ArrayList<String>) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
indexDocs(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
indexDocs(ArrayList<String>) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
indexDocs(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
indexDocs(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
indexMetaPriority - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
indexMetaPriority - +Static variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
indexMetaPriority - +Variable in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
init_languages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
init_languages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
init_languages() - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded +
  +
inLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument +
  +
inLine - +Variable in class de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument +
  +
isTextFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded +
  +
isTextFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
  +
isTextFile(File) - +Method in class de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread +
  +
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +A B C D E F G H I L M N O P R S T V W X
+ + + diff -r 000000000000 -r dc7622afcfea doc/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/index.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,39 @@ + + + + + + +Generated Documentation (Untitled) + + + + + + + + + + + +<H2> +Frame Alert</H2> + +<P> +This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. +<BR> +Link to<A HREF="overview-summary.html">Non-frame version.</A> + + + diff -r 000000000000 -r dc7622afcfea doc/overview-frame.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/overview-frame.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,58 @@ + + + + + + +Overview List + + + + + + + + + + + + + + + +
+
+ + + + + +
All Classes +

+ +Packages +
+de.mpiwg.dwinter.fulltextIndexer.harvester +
+de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +
+de.mpiwg.dwinter.fulltextIndexer.harvester.processors +
+de.mpiwg.dwinter.fulltextIndexer.OCRutils +
+de.mpiwg.dwinter.fulltextIndexer.utils +
+de.mpiwg.dwinter.lucencetools +
+de.mpiwg.dwinter.lucencetools.analyzer +
+de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +
+de.mpiwg.dwinter.lucencetools.documents +
+

+ +

+  + + diff -r 000000000000 -r dc7622afcfea doc/overview-summary.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/overview-summary.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,183 @@ + + + + + + +Overview + + + + + + + + + + + + +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Packages
de.mpiwg.dwinter.fulltextIndexer.harvester 
de.mpiwg.dwinter.fulltextIndexer.harvester.CLI 
de.mpiwg.dwinter.fulltextIndexer.harvester.processors 
de.mpiwg.dwinter.fulltextIndexer.OCRutils 
de.mpiwg.dwinter.fulltextIndexer.utils 
de.mpiwg.dwinter.lucencetools 
de.mpiwg.dwinter.lucencetools.analyzer 
de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer 
de.mpiwg.dwinter.lucencetools.documents 
+ +


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/overview-tree.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/overview-tree.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,188 @@ + + + + + + +Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For All Packages

+
+
+
Package Hierarchies:
de.mpiwg.dwinter.fulltextIndexer.harvester, de.mpiwg.dwinter.fulltextIndexer.harvester.CLI, de.mpiwg.dwinter.fulltextIndexer.harvester.processors, de.mpiwg.dwinter.fulltextIndexer.OCRutils, de.mpiwg.dwinter.fulltextIndexer.utils, de.mpiwg.dwinter.lucencetools, de.mpiwg.dwinter.lucencetools.analyzer, de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer, de.mpiwg.dwinter.lucencetools.documents
+
+

+Class Hierarchy +

+
    +
  • java.lang.Object
      +
    • java.util.AbstractCollection<E> (implements java.util.Collection<E>) +
        +
      • java.util.AbstractList<E> (implements java.util.List<E>) +
          +
        • java.util.ArrayList<E> (implements java.lang.Cloneable, java.util.List<E>, java.util.RandomAccess, java.io.Serializable) + +
        +
      +
    • org.apache.lucene.search.Collector
        +
      • org.apache.lucene.search.TopDocsCollector<T>
          +
        • org.apache.lucene.search.TopScoreDocCollector +
        +
      +
    • org.xml.sax.helpers.DefaultHandler (implements org.xml.sax.ContentHandler, org.xml.sax.DTDHandler, org.xml.sax.EntityResolver, org.xml.sax.ErrorHandler) + +
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.DocHarvesterCLIRDFMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded
    • de.mpiwg.dwinter.lucencetools.documents.FileDocument
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIRDFMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded +
    • de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer
    • de.mpiwg.dwinter.lucencetools.documents.MorphDocument
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument
    • de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD
    • de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIRDFMD
    • de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument
    • de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta (implements org.xml.sax.ContentHandler) +
    • java.io.Reader (implements java.io.Closeable, java.lang.Readable) +
        +
      • java.io.InputStreamReader +
      +
    • java.lang.Thread (implements java.lang.Runnable) + +
    +
+

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/package-list --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/package-list Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +de.mpiwg.dwinter.fulltextIndexer.OCRutils +de.mpiwg.dwinter.fulltextIndexer.harvester +de.mpiwg.dwinter.fulltextIndexer.harvester.CLI +de.mpiwg.dwinter.fulltextIndexer.harvester.processors +de.mpiwg.dwinter.fulltextIndexer.utils +de.mpiwg.dwinter.lucencetools +de.mpiwg.dwinter.lucencetools.analyzer +de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer +de.mpiwg.dwinter.lucencetools.documents diff -r 000000000000 -r dc7622afcfea doc/resources/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/resources/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +K 25 +svn:wc:ra_dav:version-url +V 54 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/resources +END +inherit.gif +K 25 +svn:wc:ra_dav:version-url +V 66 +/svn/!svn/ver/2662/fulltextIndexer/trunk/doc/resources/inherit.gif +END diff -r 000000000000 -r dc7622afcfea doc/resources/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/resources/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +10 + +dir +2662 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/doc/resources +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-28T15:47:27.673424Z +2662 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +inherit.gif +file + + + + +2010-06-28T09:04:53.000000Z +220f4eda0bd49915699315f18b8b03cf +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +57 + diff -r 000000000000 -r dc7622afcfea doc/resources/.svn/prop-base/inherit.gif.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/resources/.svn/prop-base/inherit.gif.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea doc/resources/.svn/text-base/inherit.gif.svn-base Binary file doc/resources/.svn/text-base/inherit.gif.svn-base has changed diff -r 000000000000 -r dc7622afcfea doc/resources/inherit.gif Binary file doc/resources/inherit.gif has changed diff -r 000000000000 -r dc7622afcfea doc/serialized-form.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/serialized-form.html Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,166 @@ + + + + + + +Serialized Form + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Serialized Form

+
+
+ + + + + +
+Package de.mpiwg.dwinter.lucencetools.analyzer
+ +

+ + + + + +
+Class de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers extends java.util.ArrayList<LanguageAnalyzer> implements Serializable
+ +

+serialVersionUID: 2L + +

+ +

+


+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff -r 000000000000 -r dc7622afcfea doc/stylesheet.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/stylesheet.css Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,29 @@ +/* Javadoc style sheet */ + +/* Define colors, fonts and other style attributes here to override the defaults */ + +/* Page background color */ +body { background-color: #FFFFFF; color:#000000 } + +/* Headings */ +h1 { font-size: 145% } + +/* Table colors */ +.TableHeadingColor { background: #CCCCFF; color:#000000 } /* Dark mauve */ +.TableSubHeadingColor { background: #EEEEFF; color:#000000 } /* Light mauve */ +.TableRowColor { background: #FFFFFF; color:#000000 } /* White */ + +/* Font used in left-hand frame lists */ +.FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } +.FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif; color:#000000 } + +/* Navigation bar fonts and colors */ +.NavBarCell1 { background-color:#EEEEFF; color:#000000} /* Light mauve */ +.NavBarCell1Rev { background-color:#00008B; color:#FFFFFF} /* Dark Blue */ +.NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;color:#000000;} +.NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;color:#FFFFFF;} + +.NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} +.NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF; color:#000000} + diff -r 000000000000 -r dc7622afcfea libs/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,83 @@ +K 25 +svn:wc:ra_dav:version-url +V 45 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs +END +xmlrpc-client-3.1.jar +K 25 +svn:wc:ra_dav:version-url +V 67 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-client-3.1.jar +END +xmlrpc-common-3.1.jar +K 25 +svn:wc:ra_dav:version-url +V 67 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-common-3.1.jar +END +jaxen-1.1.1.jar +K 25 +svn:wc:ra_dav:version-url +V 61 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/jaxen-1.1.1.jar +END +xmlrpc-server-3.1-javadoc.jar +K 25 +svn:wc:ra_dav:version-url +V 75 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-server-3.1-javadoc.jar +END +commons-io-1.4.jar +K 25 +svn:wc:ra_dav:version-url +V 64 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/commons-io-1.4.jar +END +xmlrpc-server-3.1-sources.jar +K 25 +svn:wc:ra_dav:version-url +V 75 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-server-3.1-sources.jar +END +xmlrpc-server-3.1.jar +K 25 +svn:wc:ra_dav:version-url +V 67 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-server-3.1.jar +END +xmlrpc-client-3.1-javadoc.jar +K 25 +svn:wc:ra_dav:version-url +V 75 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-client-3.1-javadoc.jar +END +xmlrpc-common-3.1-javadoc.jar +K 25 +svn:wc:ra_dav:version-url +V 75 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-common-3.1-javadoc.jar +END +jdom-1.0.jar +K 25 +svn:wc:ra_dav:version-url +V 58 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/jdom-1.0.jar +END +xmlrpc-client-3.1-sources.jar +K 25 +svn:wc:ra_dav:version-url +V 75 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-client-3.1-sources.jar +END +xmlrpc-common-3.1-sources.jar +K 25 +svn:wc:ra_dav:version-url +V 75 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/xmlrpc-common-3.1-sources.jar +END +ws-commons-util-1.0.2.jar +K 25 +svn:wc:ra_dav:version-url +V 71 +/svn/!svn/ver/2873/fulltextIndexer/trunk/libs/ws-commons-util-1.0.2.jar +END diff -r 000000000000 -r dc7622afcfea libs/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,477 @@ +10 + +dir +2873 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/libs +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-11-03T11:29:50.531613Z +2873 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +commons-io-1.4.jar +file + + + + +2010-06-30T09:49:12.000000Z +b6a50c8a15ece8753e37cbe5700bf84f +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +109043 + +jaxen-1.1.1.jar +file + + + + +2010-06-28T10:22:08.000000Z +261d1aa59865842ecc32b3848b0c6538 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +226915 + +jdom-1.0.jar +file + + + + +2010-06-16T09:20:28.000000Z +0b8f97de82fc9529b1028a77125ce4f8 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +153253 + +ws-commons-util-1.0.2.jar +file + + + + +2010-06-16T13:19:17.000000Z +e0d2efe441e2dec803c7749c10725f61 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +34407 + +xmlrpc-client-3.1-javadoc.jar +file + + + + +2010-06-16T09:28:42.000000Z +669da24d61547211415f3be75d6e47cb +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +199898 + +xmlrpc-client-3.1-sources.jar +file + + + + +2010-06-16T09:28:42.000000Z +c4c6c6592dfef3a017fb0fa6e746ccdf +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +35931 + +xmlrpc-client-3.1.jar +file + + + + +2010-06-16T09:28:42.000000Z +4ef5312a7a6a3609d9d0f2fb7feb0020 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +45124 + +xmlrpc-common-3.1-javadoc.jar +file + + + + +2010-06-16T09:28:42.000000Z +14534508ebfb41f99224f62243258e28 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +508783 + +xmlrpc-common-3.1-sources.jar +file + + + + +2010-06-16T09:28:42.000000Z +1d18fae953d418543107bbb2e37f4e61 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +95464 + +xmlrpc-common-3.1.jar +file + + + + +2010-06-16T09:28:42.000000Z +a2b1aa278470cd2b88111205b2094196 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +104038 + +xmlrpc-server-3.1-javadoc.jar +file + + + + +2010-06-16T09:28:42.000000Z +dace332947fc371b2ee397b77a0d0c69 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +236635 + +xmlrpc-server-3.1-sources.jar +file + + + + +2010-06-16T09:28:42.000000Z +2ce1c0e27e84e9e5e5ab547fb9fdebb9 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +55411 + +xmlrpc-server-3.1.jar +file + + + + +2010-06-16T09:28:42.000000Z +eee3833c266912d672a783415ca63576 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +75310 + diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/commons-io-1.4.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/commons-io-1.4.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/jaxen-1.1.1.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/jaxen-1.1.1.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/jdom-1.0.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/jdom-1.0.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/ws-commons-util-1.0.2.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/ws-commons-util-1.0.2.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-client-3.1-javadoc.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-client-3.1-javadoc.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-client-3.1-sources.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-client-3.1-sources.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-client-3.1.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-client-3.1.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-common-3.1-javadoc.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-common-3.1-javadoc.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-common-3.1-sources.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-common-3.1-sources.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-common-3.1.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-common-3.1.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-server-3.1-javadoc.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-server-3.1-javadoc.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-server-3.1-sources.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-server-3.1-sources.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/prop-base/xmlrpc-server-3.1.jar.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libs/.svn/prop-base/xmlrpc-server-3.1.jar.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 24 +application/octet-stream +END diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/commons-io-1.4.jar.svn-base Binary file libs/.svn/text-base/commons-io-1.4.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/jaxen-1.1.1.jar.svn-base Binary file libs/.svn/text-base/jaxen-1.1.1.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/jdom-1.0.jar.svn-base Binary file libs/.svn/text-base/jdom-1.0.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/ws-commons-util-1.0.2.jar.svn-base Binary file libs/.svn/text-base/ws-commons-util-1.0.2.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-client-3.1-javadoc.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-client-3.1-javadoc.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-client-3.1-sources.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-client-3.1-sources.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-client-3.1.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-client-3.1.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-common-3.1-javadoc.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-common-3.1-javadoc.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-common-3.1-sources.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-common-3.1-sources.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-common-3.1.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-common-3.1.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-server-3.1-javadoc.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-server-3.1-javadoc.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-server-3.1-sources.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-server-3.1-sources.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/.svn/text-base/xmlrpc-server-3.1.jar.svn-base Binary file libs/.svn/text-base/xmlrpc-server-3.1.jar.svn-base has changed diff -r 000000000000 -r dc7622afcfea libs/commons-io-1.4.jar Binary file libs/commons-io-1.4.jar has changed diff -r 000000000000 -r dc7622afcfea libs/jaxen-1.1.1.jar Binary file libs/jaxen-1.1.1.jar has changed diff -r 000000000000 -r dc7622afcfea libs/jdom-1.0.jar Binary file libs/jdom-1.0.jar has changed diff -r 000000000000 -r dc7622afcfea libs/ws-commons-util-1.0.2.jar Binary file libs/ws-commons-util-1.0.2.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-client-3.1-javadoc.jar Binary file libs/xmlrpc-client-3.1-javadoc.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-client-3.1-sources.jar Binary file libs/xmlrpc-client-3.1-sources.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-client-3.1.jar Binary file libs/xmlrpc-client-3.1.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-common-3.1-javadoc.jar Binary file libs/xmlrpc-common-3.1-javadoc.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-common-3.1-sources.jar Binary file libs/xmlrpc-common-3.1-sources.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-common-3.1.jar Binary file libs/xmlrpc-common-3.1.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-server-3.1-javadoc.jar Binary file libs/xmlrpc-server-3.1-javadoc.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-server-3.1-sources.jar Binary file libs/xmlrpc-server-3.1-sources.jar has changed diff -r 000000000000 -r dc7622afcfea libs/xmlrpc-server-3.1.jar Binary file libs/xmlrpc-server-3.1.jar has changed diff -r 000000000000 -r dc7622afcfea model.uml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/model.uml Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,479 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + +
+ + + + + + + +
+ + + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + +
+ + + + + + + +
+ + + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + + + +
+ + + + + + +
+ + + + + + + + + + + +
+ + + + +
+ + + + +
+ + + + + + + +
+ + + + + + + +
+ + + + + + +
+ + + + + +
+ + + + + + + +
+ + + + + +
+ + + + + +
+ + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea model.umldi --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/model.umldi Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 44 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src +END diff -r 000000000000 -r dc7622afcfea src/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,41 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +META-INF +dir + +de +dir + diff -r 000000000000 -r dc7622afcfea src/META-INF/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/META-INF/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +K 25 +svn:wc:ra_dav:version-url +V 53 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/META-INF +END +MANIFEST.MF +K 25 +svn:wc:ra_dav:version-url +V 65 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/META-INF/MANIFEST.MF +END diff -r 000000000000 -r dc7622afcfea src/META-INF/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/META-INF/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +10 + +dir +2873 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/META-INF +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-11-03T11:29:50.531613Z +2873 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +MANIFEST.MF +file + + + + +2010-07-12T19:40:15.000000Z +b10f37c8bb1803d98c127a01d1a71cc5 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +39 + diff -r 000000000000 -r dc7622afcfea src/META-INF/.svn/prop-base/MANIFEST.MF.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/META-INF/.svn/prop-base/MANIFEST.MF.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/META-INF/.svn/text-base/MANIFEST.MF.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/META-INF/.svn/text-base/MANIFEST.MF.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Class-Path: + diff -r 000000000000 -r dc7622afcfea src/META-INF/MANIFEST.MF --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/META-INF/MANIFEST.MF Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Class-Path: + diff -r 000000000000 -r dc7622afcfea src/de/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 47 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de +END diff -r 000000000000 -r dc7622afcfea src/de/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,38 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +mpiwg +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 53 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,38 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +dwinter +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 61 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,41 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +fulltextIndexer +dir + +lucencetools +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 25 +svn:wc:ra_dav:version-url +V 77 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,47 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRutils +dir + +catalog +dir + +harvester +dir + +utils +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,17 @@ +K 25 +svn:wc:ra_dav:version-url +V 86 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils +END +OCRDocument.java +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.java +END +ParseOcrDocument.java +K 25 +svn:wc:ra_dav:version-url +V 108 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/ParseOcrDocument.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,103 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRDocument.java +file + + + + +2010-06-16T07:59:06.000000Z +7422c151d83889cea5eba21809de28d2 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1298 + +ParseOcrDocument.java +file + + + + +2010-06-16T09:37:04.000000Z +1597e8aba362f1a61f9d3a0f7c8d9613 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +4843 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/OCRDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/OCRDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/ParseOcrDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/prop-base/ParseOcrDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/OCRDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/OCRDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,44 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.OCRutils; +/* */ +/* */ import java.util.ArrayList; +/* */ +/* */ public class OCRDocument +/* */ { +/* */ public String filename; +/* */ public String pageDimension; +/* */ public ArrayList OCRLines; +/* */ +/* */ public OCRDocument(String filename, String pageDimension, ArrayList lines) +/* */ { +/* 26 */ this.filename = filename; +/* 27 */ this.pageDimension = pageDimension; +/* 28 */ this.OCRLines = lines; +/* */ } +/* */ +/* */ public OCRDocument() { +/* 32 */ this.OCRLines = new ArrayList(); +/* */ } +/* */ +/* */ public class OCRLine +/* */ { +/* */ public String lineNumber; +/* */ public String bbox; +/* */ public String content; +/* */ +/* */ public OCRLine(String lineNumber, String bbox, String content) +/* */ { +/* 14 */ this.lineNumber = lineNumber; +/* 15 */ this.bbox = bbox; +/* 16 */ this.content = content; +/* */ } +/* */ +/* */ public OCRLine() +/* */ { +/* */ } +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/ParseOcrDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/.svn/text-base/ParseOcrDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,146 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.OCRutils; +/* */ +/* */ import java.util.ArrayList; +/* */ import org.xml.sax.Attributes; +/* */ import org.xml.sax.Locator; +/* */ import org.xml.sax.SAXException; +/* */ import org.xml.sax.helpers.DefaultHandler; +/* */ +/* */ public class ParseOcrDocument extends DefaultHandler +/* */ { +/* 31 */ public OCRDocument ocrDocument = new OCRDocument(); +/* */ private int lineCounter; +/* */ private OCRDocument.OCRLine currentLine; +/* */ private boolean inLine; +/* */ +/* */ public void startDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void endDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void characters(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* 52 */ if (this.inLine) +/* 53 */ this.currentLine.content += new String(c, start, length); +/* */ } +/* */ +/* */ public void ignorableWhitespace(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void processingInstruction(String target, String data) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void setDocumentLocator(Locator arg1) +/* */ { +/* */ } +/* */ +/* */ public void endElement(String uri, String localName, String name) +/* */ throws SAXException +/* */ { +/* 76 */ if ((!name.equals("span")) || +/* 78 */ (!this.inLine)) +/* */ return; +/* 80 */ this.ocrDocument.OCRLines.add(this.currentLine); +/* 81 */ this.inLine = false; +/* */ } +/* */ +/* */ public void endPrefixMapping(String prefix) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void skippedEntity(String name) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void startElement(String uri, String localName, String name, Attributes attrs) +/* */ throws SAXException +/* */ { +/* 100 */ if ((name.equals("div")) && +/* 101 */ (attrs != null)) +/* */ { +/* 103 */ int length = attrs.getLength(); +/* */ +/* 106 */ for (int i = 0; i < length; ++i) +/* */ { +/* 108 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 110 */ (!attrs.getValue(i).equals("ocr_page"))) continue; +/* 111 */ doPage(attrs); +/* */ } +/* */ +/* */ } +/* */ +/* 117 */ if ((!name.equals("span")) || +/* 118 */ (attrs == null)) +/* */ return; +/* 120 */ int length = attrs.getLength(); +/* */ +/* 123 */ for (int i = 0; i < length; ++i) +/* */ { +/* 125 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 127 */ (!attrs.getValue(i).equals("ocr_line"))) continue; +/* 128 */ doLine(attrs); +/* */ } +/* */ } +/* */ +/* */ private void doPage(Attributes attrs) +/* */ { +/* 136 */ int length = attrs.getLength(); +/* */ +/* 139 */ for (int i = 0; i < length; ++i) +/* */ { +/* 141 */ if (!attrs.getLocalName(i).equals("title")) +/* */ continue; +/* 143 */ String title = attrs.getValue(i); +/* 144 */ String[] splitted = title.split(" "); +/* 145 */ String dimension = splitted[(splitted.length - 2)] + " " + splitted[(splitted.length - 1)]; +/* 146 */ this.ocrDocument.pageDimension = dimension; +/* */ } +/* */ +/* 149 */ this.lineCounter = 0; +/* */ } +/* */ +/* */ private void doLine(Attributes attrs) +/* */ { +/* */ OCRDocument doc = this.ocrDocument; + this.currentLine = doc.new OCRLine(); +/* */ +/* 156 */ this.inLine = true; +/* 157 */ this.currentLine.content = new String(); +/* */ +/* 159 */ int length = attrs.getLength(); +/* 160 */ this.currentLine.lineNumber = String.valueOf(this.lineCounter); +/* 161 */ this.lineCounter += 1; +/* 162 */ this.currentLine.bbox = "0 0"; +/* 163 */ for (int i = 0; i < length; ++i) +/* */ { +/* 165 */ if (!attrs.getLocalName(i).equals("title")) +/* */ continue; +/* 167 */ String title = attrs.getValue(i); +/* */ +/* 169 */ String dimension = title.replace("bbox ", ""); +/* 170 */ this.currentLine.bbox = dimension; +/* */ } +/* */ } +/* */ +/* */ public void startPrefixMapping(String prefix, String uri) +/* */ throws SAXException +/* */ { +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/OCRDocument.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,44 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.OCRutils; +/* */ +/* */ import java.util.ArrayList; +/* */ +/* */ public class OCRDocument +/* */ { +/* */ public String filename; +/* */ public String pageDimension; +/* */ public ArrayList OCRLines; +/* */ +/* */ public OCRDocument(String filename, String pageDimension, ArrayList lines) +/* */ { +/* 26 */ this.filename = filename; +/* 27 */ this.pageDimension = pageDimension; +/* 28 */ this.OCRLines = lines; +/* */ } +/* */ +/* */ public OCRDocument() { +/* 32 */ this.OCRLines = new ArrayList(); +/* */ } +/* */ +/* */ public class OCRLine +/* */ { +/* */ public String lineNumber; +/* */ public String bbox; +/* */ public String content; +/* */ +/* */ public OCRLine(String lineNumber, String bbox, String content) +/* */ { +/* 14 */ this.lineNumber = lineNumber; +/* 15 */ this.bbox = bbox; +/* 16 */ this.content = content; +/* */ } +/* */ +/* */ public OCRLine() +/* */ { +/* */ } +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/ParseOcrDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/OCRutils/ParseOcrDocument.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,146 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.OCRutils; +/* */ +/* */ import java.util.ArrayList; +/* */ import org.xml.sax.Attributes; +/* */ import org.xml.sax.Locator; +/* */ import org.xml.sax.SAXException; +/* */ import org.xml.sax.helpers.DefaultHandler; +/* */ +/* */ public class ParseOcrDocument extends DefaultHandler +/* */ { +/* 31 */ public OCRDocument ocrDocument = new OCRDocument(); +/* */ private int lineCounter; +/* */ private OCRDocument.OCRLine currentLine; +/* */ private boolean inLine; +/* */ +/* */ public void startDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void endDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void characters(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* 52 */ if (this.inLine) +/* 53 */ this.currentLine.content += new String(c, start, length); +/* */ } +/* */ +/* */ public void ignorableWhitespace(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void processingInstruction(String target, String data) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void setDocumentLocator(Locator arg1) +/* */ { +/* */ } +/* */ +/* */ public void endElement(String uri, String localName, String name) +/* */ throws SAXException +/* */ { +/* 76 */ if ((!name.equals("span")) || +/* 78 */ (!this.inLine)) +/* */ return; +/* 80 */ this.ocrDocument.OCRLines.add(this.currentLine); +/* 81 */ this.inLine = false; +/* */ } +/* */ +/* */ public void endPrefixMapping(String prefix) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void skippedEntity(String name) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void startElement(String uri, String localName, String name, Attributes attrs) +/* */ throws SAXException +/* */ { +/* 100 */ if ((name.equals("div")) && +/* 101 */ (attrs != null)) +/* */ { +/* 103 */ int length = attrs.getLength(); +/* */ +/* 106 */ for (int i = 0; i < length; ++i) +/* */ { +/* 108 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 110 */ (!attrs.getValue(i).equals("ocr_page"))) continue; +/* 111 */ doPage(attrs); +/* */ } +/* */ +/* */ } +/* */ +/* 117 */ if ((!name.equals("span")) || +/* 118 */ (attrs == null)) +/* */ return; +/* 120 */ int length = attrs.getLength(); +/* */ +/* 123 */ for (int i = 0; i < length; ++i) +/* */ { +/* 125 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 127 */ (!attrs.getValue(i).equals("ocr_line"))) continue; +/* 128 */ doLine(attrs); +/* */ } +/* */ } +/* */ +/* */ private void doPage(Attributes attrs) +/* */ { +/* 136 */ int length = attrs.getLength(); +/* */ +/* 139 */ for (int i = 0; i < length; ++i) +/* */ { +/* 141 */ if (!attrs.getLocalName(i).equals("title")) +/* */ continue; +/* 143 */ String title = attrs.getValue(i); +/* 144 */ String[] splitted = title.split(" "); +/* 145 */ String dimension = splitted[(splitted.length - 2)] + " " + splitted[(splitted.length - 1)]; +/* 146 */ this.ocrDocument.pageDimension = dimension; +/* */ } +/* */ +/* 149 */ this.lineCounter = 0; +/* */ } +/* */ +/* */ private void doLine(Attributes attrs) +/* */ { +/* */ OCRDocument doc = this.ocrDocument; + this.currentLine = doc.new OCRLine(); +/* */ +/* 156 */ this.inLine = true; +/* 157 */ this.currentLine.content = new String(); +/* */ +/* 159 */ int length = attrs.getLength(); +/* 160 */ this.currentLine.lineNumber = String.valueOf(this.lineCounter); +/* 161 */ this.lineCounter += 1; +/* 162 */ this.currentLine.bbox = "0 0"; +/* 163 */ for (int i = 0; i < length; ++i) +/* */ { +/* 165 */ if (!attrs.getLocalName(i).equals("title")) +/* */ continue; +/* 167 */ String title = attrs.getValue(i); +/* */ +/* 169 */ String dimension = title.replace("bbox ", ""); +/* 170 */ this.currentLine.bbox = dimension; +/* */ } +/* */ } +/* */ +/* */ public void startPrefixMapping(String prefix, String uri) +/* */ throws SAXException +/* */ { +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.OCRutils.ParseOcrDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,29 @@ +K 25 +svn:wc:ra_dav:version-url +V 85 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/catalog +END +xhtml1-transitional.dtd +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml1-transitional.dtd +END +xhtml-lat1.ent +K 25 +svn:wc:ra_dav:version-url +V 100 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-lat1.ent +END +xhtml-special.ent +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-special.ent +END +xhtml-symbol.ent +K 25 +svn:wc:ra_dav:version-url +V 102 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-symbol.ent +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,171 @@ +10 + +dir +2873 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/catalog +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-11-03T11:29:50.531613Z +2873 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +xhtml-lat1.ent +file + + + + +2010-06-16T18:30:41.000000Z +9d841f57d8a28eb32c04db0e36a078dd +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +11775 + +xhtml-special.ent +file + + + + +2010-06-16T18:30:41.000000Z +dc34235b0742b58b7a710cc63459c0f7 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +4131 + +xhtml-symbol.ent +file + + + + +2010-06-16T18:30:41.000000Z +17c862dda86c53953f18452bdef54e40 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +13848 + +xhtml1-transitional.dtd +file + + + + +2010-06-16T18:30:41.000000Z +52118e4fe2efb0930ce211247ec237d0 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +32111 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml-lat1.ent.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml-lat1.ent.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml-special.ent.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml-special.ent.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml-symbol.ent.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml-symbol.ent.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml1-transitional.dtd.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/prop-base/xhtml1-transitional.dtd.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml-lat1.ent.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml-lat1.ent.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml-special.ent.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml-special.ent.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml-symbol.ent.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml-symbol.ent.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml1-transitional.dtd.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/.svn/text-base/xhtml1-transitional.dtd.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,1201 @@ + + + + + +%HTMLlat1; + + +%HTMLsymbol; + + +%HTMLspecial; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-lat1.ent --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-lat1.ent Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-special.ent --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-special.ent Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,80 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-symbol.ent --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-symbol.ent Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml1-transitional.dtd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml1-transitional.dtd Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,1201 @@ + + + + + +%HTMLlat1; + + +%HTMLsymbol; + + +%HTMLspecial; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,29 @@ +K 25 +svn:wc:ra_dav:version-url +V 87 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester +END +HarvesterThreaded.java +K 25 +svn:wc:ra_dav:version-url +V 110 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/HarvesterThreaded.java +END +DocHarvesterThreaded.java +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/DocHarvesterThreaded.java +END +MyResolver.java +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/MyResolver.java +END +OCRHarvesterThreaded.java +K 25 +svn:wc:ra_dav:version-url +V 113 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,177 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +CLI +dir + +DocHarvesterThreaded.java +file +2873 + + + +2010-07-01T14:20:21.000000Z +f0712a7f0aceb96161041a7be6f7e0ba +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +17082 + +HarvesterThreaded.java +file +2873 + + + +2010-06-29T07:35:30.000000Z +cf6f725d6f13d99bf3eb602a3a28d5bc +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +12910 + +MyResolver.java +file +2873 + + + +2010-06-30T12:56:38.000000Z +4af3c09063cb95e17534deb29856499c +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1823 + +OCRHarvesterThreaded.java +file +2873 + + + +2010-09-10T11:41:21.000000Z +a55051234f105ec27be3c87c836acc33 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5008 + +processors +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/DocHarvesterThreaded.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/DocHarvesterThreaded.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/HarvesterThreaded.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/HarvesterThreaded.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/MyResolver.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/MyResolver.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/OCRHarvesterThreaded.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/prop-base/OCRHarvesterThreaded.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/DocHarvesterThreaded.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/DocHarvesterThreaded.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,577 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester; + +/* Harveste jeweils ein komplettes Buch in einen Eintrag + * + * */ +import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; + +import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; + +import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; + +import java.io.BufferedReader; + +import java.io.File; + +import java.io.FileNotFoundException; + +import java.io.BufferedInputStream; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.StringWriter; + +import java.io.IOException; + +import java.io.PrintStream; + +import java.util.ArrayList; + +import java.util.Arrays; + +import java.util.Date; + +import java.util.HashMap; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMResult; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.commons.io.IOUtils; +import org.apache.lucene.analysis.de.GermanAnalyzer; + +import org.apache.lucene.analysis.fr.FrenchAnalyzer; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +import org.apache.lucene.index.CorruptIndexException; + +import org.apache.lucene.store.LockObtainFailedException; + +import org.apache.lucene.util.Version; +import org.apache.ws.commons.serialize.XMLWriterImpl; + +import org.jdom.Document; + +import org.jdom.Element; + +import org.jdom.JDOMException; + +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; +import org.w3c.dom.DocumentFragment; +import org.xml.sax.SAXException; + +import com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl; + +public class DocHarvesterThreaded { + private static final boolean DEBUG = false; + private static final int MAXFILES = 3; + + //private static final String PREFIX = "/tmp/composed/files"; + private static final String PREFIX = "/Volumes/data/composed/files"; + private static final String COMPOSEDFN = "doc.xml"; + private static final boolean CREATE_NEW = false; + + protected static ArrayList fileTypesToIndex = new ArrayList( + Arrays.asList(new String[] { "xml" })); + + protected static ArrayList excludeFolders = new ArrayList( + Arrays.asList(new String[] { "OCR" })); + protected static boolean indexMetaPriority = false; + + private static String specialMode = ""; + protected static int maxThread = 30; + protected File docDir; + protected File index_dir; + protected HashMap textLanguage = null; + protected HashMap languageToISO = new HashMap(); + protected LanguageAnalyzers languageAnalyzers = new LanguageAnalyzers(); + + private int counter = 0; + protected String languageFileName; + protected ProcessFileThread[] mythreads = new ProcessFileThread[maxThread]; + private int filecount = 0; + + protected String mdProviderUrl = null; + private String preferedLanguage; + protected HashMap supportedLanguageFolder = new HashMap(); + private int completedFiles = 0; // counter for all files completed and indexed + + public DocHarvesterThreaded() { + } + + public DocHarvesterThreaded(File docDir, File index_dir, + String languageFileName, String mdProviderUrl, String lang) + throws CorruptIndexException, LockObtainFailedException, + IOException { + /* 119 */this.docDir = docDir; + /* 120 */this.languageFileName = languageFileName; + /* 121 */this.preferedLanguage = lang; + + /* 133 */this.mdProviderUrl = mdProviderUrl; + + /* 135 */this.index_dir = index_dir; + + /* 137 */for (int i = 0; i < maxThread; ++i) { + /* 139 */this.mythreads[i] = null; + } + + /* 142 */init_languages(); + } + + private void init_languages() { + /* 146 */this.languageToISO.put("German", "de"); + /* 147 */this.languageToISO.put("French", "fr"); + /* 148 */this.languageToISO.put("English", "en"); + /* 149 */this.languageToISO.put("German-f", "de-f"); + + /* 151 */this.supportedLanguageFolder.put("deu", "de"); + /* 152 */this.supportedLanguageFolder.put("deu-f", "de"); + /* 153 */this.supportedLanguageFolder.put("fra", "fr"); + /* 154 */this.supportedLanguageFolder.put("eng", "en"); + /* 155 */this.supportedLanguageFolder.put("lic", "la"); + try { + /* 158 */this.languageAnalyzers.add(new LanguageAnalyzer("de", + new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 160 */this.languageAnalyzers.add(new LanguageAnalyzer("en", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 161 */this.languageAnalyzers.add(new LanguageAnalyzer("fr", + new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 162 */this.languageAnalyzers.add(new LanguageAnalyzer("la", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + + /* 164 */this.languageAnalyzers.add(new LanguageAnalyzer("all", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 165 */this.languageAnalyzers.add(new LanguageAnalyzer("morph", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + } catch (CorruptIndexException e) { + /* 167 */e.printStackTrace(); + /* 168 */System.exit(1); + } catch (LockObtainFailedException e) { + /* 170 */e.printStackTrace(); + /* 171 */System.exit(1); + } catch (IOException e) { + /* 173 */e.printStackTrace(); + /* 174 */System.exit(1); + } + } + + public DocHarvesterThreaded(File docDir, File index_dir, + String mdProviderUrl) throws CorruptIndexException, + LockObtainFailedException, IOException { + /* 180 */this(docDir, index_dir, null, mdProviderUrl, null); + } + + protected HashMap loadLanguages() { + /* 187 */File languageFile = new File(this.docDir + "/" + + this.languageFileName); + /* 188 */String languageFilePath = this.docDir + "/" + + this.languageFileName; + /* 189 */HashMap languages = new HashMap(); + /* 190 */boolean relativ = true; + /* 191 */if (this.languageFileName == null) + /* 192 */return null; + /* 193 */if (!languageFile.exists()) { + /* 195 */languageFile = new File(this.languageFileName); + /* 196 */languageFilePath = this.languageFileName; + /* 197 */relativ = false; + /* 198 */if (!languageFile.exists()) + /* 199 */return null; + } + BufferedReader in; + try { + /* 203 */in = new BufferedReader(new FileReader(languageFilePath)); + } catch (FileNotFoundException e) { + /* 205 */return null; + } + + /* 208 */String zeile = null; + try { + /* 210 */while ((zeile = in.readLine()) != null) { + /* 211 */String[] splitted = zeile.replace("\"", "").split( + "[,]"); + /* 212 */if (splitted.length == 2) + /* 213 */if (relativ) + /* 214 */languages.put(this.docDir + "/" + splitted[0], + splitted[1]); + else + /* 216 */languages.put(splitted[0], splitted[1]); + } + } catch (IOException e) { + /* 220 */e.printStackTrace(); + /* 221 */return null; + } + + /* 224 */return languages; + } + + public void harvestFromRDF(String rdffilepath) throws InterruptedException, + JDOMException { + /* 228 */Date start = new Date(); + /* 229 */boolean create = true; + try { + /* 240 */System.out.println("Indexing to directory '" + + this.index_dir + "'..."); + /* 241 */ArrayList files = getFileListFromRDF(rdffilepath); + /* 242 */indexDocs(files); + /* 243 */System.out.println("Optimizing..."); + /* 244 */this.languageAnalyzers.optimize(); + /* 245 */this.languageAnalyzers.close(); + + /* 247 */Date end = new Date(); + /* 248 */System.out.println(end.getTime() - start.getTime() + + " total milliseconds"); + } catch (IOException e) { + /* 251 */System.out.println(" caught a " + e.getClass() + + /* 252 */"\n with message: " + e.getMessage()); + } + } + + private ArrayList getFileListFromRDF(String rdffilepath) + throws JDOMException, IOException { + /* 260 */ArrayList ret = new ArrayList(); + /* 261 */SAXBuilder builder = new SAXBuilder(); + + /* 263 */Document doc = builder.build(rdffilepath); + + /* 265 */Element el = doc.getRootElement(); + + /* 267 */XPath xpath = XPath.newInstance("//MPIWG:archive-path"); + /* 268 */xpath.addNamespace("MPIWG", + "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"); + /* 269 */List paths = xpath.selectNodes(el); + /* 270 */for (Element path : paths) { + /* 271 */ret.add(path.getText()); + } + + /* 274 */return ret; + } + + public void harvestFolder() throws InterruptedException { + /* 278 */Date start = new Date(); + /* 279 */boolean create = true; + try { + /* 290 */System.out.println("Indexing to directory '" + + this.index_dir + "'..."); + /* 291 */indexDocs(this.docDir); + /* 292 */System.out.println("Optimizing..."); + /* 293 */this.languageAnalyzers.optimize(); + /* 294 */this.languageAnalyzers.close(); + + /* 296 */Date end = new Date(); + /* 297 */System.out.println(end.getTime() - start.getTime() + + " total milliseconds"); + } catch (IOException e) { + /* 300 */System.out.println(" caught a " + e.getClass() + + /* 301 */"\n with message: " + e.getMessage()); + } + } + + private void indexDocs(ArrayList files) throws IOException, + InterruptedException { + /* 308 */for (String filename : files) { + /* 310 */indexDocs(new File(this.docDir.getAbsolutePath() + + filename)); + if ((DEBUG == true) & (this.filecount > MAXFILES)) + break; + } + } + + void indexDocs(File file) throws IOException, InterruptedException { + /* 317 */if (!file.canRead()) + return; + /* 319 */ + /* 321 */if ((DEBUG == true) && (this.filecount > MAXFILES)) + return; + /* 325 */String[] files = file.list(); + + /* 327 */String folderName = file.getName(); + + boolean notExists = !checkFileExists(file); + boolean createNew = CREATE_NEW || notExists; + // boolean createNew = true; + + boolean fileStillEmpty = true; + if (createNew) { + clearFile(file); // loesche das gesamtfile + } else { + fileStillEmpty = false; // assume that file is not empty, if it already exists + } + + + if ((((files != null) ? 1 : 0) & ((excludeFolders.contains(folderName)) ? 0 + : 1)) != 0) { + for (int i = 0; i < files.length; ++i) { + File nextFile = new File(file, files[i]); + + if (nextFile.isDirectory()) // directory dann gehe in die + // naechste ebene + indexDocs(nextFile); + + else if (isTextFile(nextFile)) { + + if (createNew) { + fileStillEmpty = false; //datei hat jetzt einen Inhalt + compose(file, nextFile); // fuege das file an das + // gesamtfilean + } + + } + if ((DEBUG == true) && (this.filecount > MAXFILES)) + break; + } + if (createNew) { + if (fileStillEmpty){ + deleteComposedFile(file); // file hat keinen inhalt dann loeschen + } else { + finishFile(file); + } + } + + if (!fileStillEmpty) + processCompleteFile(file); + /* 335 */} else { + /* 342 */System.out.println("not adding " + file); + } + } + + private void finishFile(File folder) { + File cf = getComposedFile(folder); + System.out.println(); + try { + System.out.println("finish file:" + cf.getCanonicalPath()); + FileWriter fw = new FileWriter(cf, true); + + fw.write(""); + fw.close(); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private boolean deleteComposedFile(File folder) { + File cf = getComposedFile(folder); + try { + System.out.println("file deleted, because empty:" + cf.getCanonicalPath()); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return cf.delete(); + } + + + private void processCompleteFile(File folder) { + System.out.println("Completed File:"+String.valueOf(completedFiles++)); + File cf = getComposedFile(folder); + try { + processFile(cf); + } catch (CorruptIndexException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (LockObtainFailedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private boolean checkFileExists(File folder) { + File cf = getComposedFile(folder); + return cf.exists(); + + } + + private void clearFile(File folder) { + File cf = getComposedFile(folder); + cf.delete(); + try { + File dir = cf.getParentFile(); + if (false == dir.exists()) { + dir.mkdirs(); + } + + cf.createNewFile(); + + FileWriter fw = new FileWriter(cf); + fw.write(""); + fw.close(); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private void compose(File folder, File file) { + File cf = getComposedFile(folder); + try { + System.out.println("Adding" + file.getCanonicalPath()); + //FileWriter fw = new FileWriter(cf, true); + + FileOutputStream stream = new FileOutputStream(cf,true); + + OutputStreamWriter fw = new OutputStreamWriter(stream, "utf-8"); + + String filteredDocument=""; + try { + filteredDocument = getFilteredFile(file); + } catch (TransformerException e) { + filteredDocument = ""; + }catch (ParserConfigurationException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SAXException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + fw.append(filteredDocument); + fw.write(""); + fw.close(); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private String getFilteredFile(File file) throws IOException, + TransformerException, ParserConfigurationException, SAXException { + + // String txt = IOUtils.toString(new FileInputStream(file)); + // get rid of the entities + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer t = tf.newTransformer(); + + + //OutputStream output = new ByteArrayOutputStream(); + + //BufferedWriter sw = new BufferedWriter(new OutputStreamWriter(output, "utf-8")); + + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setNamespaceAware(true); + dbf.setValidating(false); + DocumentBuilder db = dbf.newDocumentBuilder(); + db.setEntityResolver(new MyResolver()); + org.w3c.dom.Document doc = db.parse(file); + + StringWriter sw = new StringWriter(); + StreamResult sr = new StreamResult(sw); + + org.w3c.dom.Document tgtDoc = db.newDocument(); + DocumentFragment fragment = tgtDoc.createDocumentFragment(); + DOMResult tgtDom = new DOMResult( fragment ); + + t.setOutputProperty(OutputKeys.ENCODING, "utf-8"); + t.transform(new DOMSource(doc), sr); + t.transform(new DOMSource(doc), tgtDom); + + String txt = sw.toString(); + + + + Pattern p = Pattern.compile("(.*)", Pattern.DOTALL); + Matcher m = p.matcher(txt); + if (m.find()) + if (m.groupCount() > 0) { + return m.group(1); + } + return ""; + } + + private File getComposedFile(File folder) { + try { + String path = folder.getCanonicalPath(); + String newPath = PREFIX + path + "/" + COMPOSEDFN; + return new File(newPath); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + } + + protected void processFile(File file) throws CorruptIndexException, + LockObtainFailedException, IOException { + /* 348 */int freeThread = -1; + /* 349 */while (freeThread == -1) { + /* 351 */freeThread = waitForFreeThread(); + } + + /* 355 */if (this.textLanguage == null) + /* 356 */this.textLanguage = loadLanguages(); + /* 357 */this.mythreads[freeThread] = new ProcessFileThread( + this.languageAnalyzers, file, this.languageFileName, + this.textLanguage, this.mdProviderUrl, this.preferedLanguage, + this.languageToISO, this.supportedLanguageFolder); + /* 358 */this.mythreads[freeThread].start(); + /* 359 */System.out.println("New process started:" + freeThread); + } + + protected int waitForFreeThread() { + /* 367 */for (int i = 0; i < maxThread; ++i) { + /* 369 */if (this.mythreads[i] == null) + /* 370 */return i; + /* 371 */if (!this.mythreads[i].done) + continue; + /* 373 */this.filecount += 1; + /* 374 */System.out.println("filecount:" + this.filecount); + /* 375 */return i; + } + + /* 378 */return -1; + } + + private boolean isTextFile(File file) { + /* 392 */String fn = file.getName(); + + /* 394 */String[] splitted = fn.split("[.]"); + + /* 396 */String ext = ""; + + /* 398 */if (splitted.length > 1) { + /* 400 */ext = splitted[(splitted.length - 1)]; + } + boolean ret = fileTypesToIndex.contains(ext); + /* 403 */return ret; + } + +} + +/* + * Location: /private/tmp/fulltextIndexer.jar Qualified Name: + * de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded JD-Core Version: + * 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/HarvesterThreaded.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/HarvesterThreaded.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,311 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +/* */ import java.io.BufferedReader; +/* */ import java.io.File; +/* */ import java.io.FileNotFoundException; +/* */ import java.io.FileReader; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.ArrayList; +/* */ import java.util.Arrays; +/* */ import java.util.Date; +/* */ import java.util.HashMap; +/* */ import java.util.List; +/* */ import org.apache.lucene.analysis.de.GermanAnalyzer; +/* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer; +/* */ import org.apache.lucene.analysis.standard.StandardAnalyzer; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ import org.apache.lucene.util.Version; +/* */ import org.jdom.Document; +/* */ import org.jdom.Element; +/* */ import org.jdom.JDOMException; +/* */ import org.jdom.input.SAXBuilder; +/* */ import org.jdom.xpath.XPath; +/* */ +/* */ public class HarvesterThreaded +/* */ { +/* */ private static final boolean DEBUG = false; + private static final int MAXFILES = 100; // only used if DEBUG is true +/* 75 */ protected static ArrayList fileTypesToIndex = new ArrayList(Arrays.asList(new String[] { "xml" })); +/* */ +/* 77 */ protected static ArrayList excludeFolders = new ArrayList(Arrays.asList(new String[] { "OCR" })); +/* 78 */ protected static boolean indexMetaPriority = false; +/* */ +/* 81 */ private static String specialMode = ""; +/* 82 */ protected static int maxThread = 30; +/* */ protected File docDir; +/* */ protected File index_dir; +/* 88 */ protected HashMap textLanguage = null; +/* 89 */ protected HashMap languageToISO = new HashMap(); +/* 90 */ protected LanguageAnalyzers languageAnalyzers = new LanguageAnalyzers(); +/* */ +/* 92 */ private int counter = 0; +/* */ protected String languageFileName; +/* 99 */ protected ProcessFileThread[] mythreads = new ProcessFileThread[maxThread]; +/* 100 */ private int filecount = 0; +/* */ +/* 102 */ protected String mdProviderUrl = null; +/* */ private String preferedLanguage; +/* 106 */ protected HashMap supportedLanguageFolder = new HashMap(); +/* */ +/* */ public HarvesterThreaded() +/* */ { +/* */ } +/* */ +/* */ public HarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 119 */ this.docDir = docDir; +/* 120 */ this.languageFileName = languageFileName; +/* 121 */ this.preferedLanguage = lang; +/* */ +/* 133 */ this.mdProviderUrl = mdProviderUrl; +/* */ +/* 135 */ this.index_dir = index_dir; +/* */ +/* 137 */ for (int i = 0; i < maxThread; ++i) +/* */ { +/* 139 */ this.mythreads[i] = null; +/* */ } +/* */ +/* 142 */ init_languages(); +/* */ } +/* */ +/* */ private void init_languages() { +/* 146 */ this.languageToISO.put("German", "de"); +/* 147 */ this.languageToISO.put("French", "fr"); +/* 148 */ this.languageToISO.put("English", "en"); +/* 149 */ this.languageToISO.put("German-f", "de-f"); +/* */ +/* 151 */ this.supportedLanguageFolder.put("deu", "de"); +/* 152 */ this.supportedLanguageFolder.put("deu-f", "de"); +/* 153 */ this.supportedLanguageFolder.put("fra", "fr"); +/* 154 */ this.supportedLanguageFolder.put("eng", "en"); +/* 155 */ this.supportedLanguageFolder.put("lic", "la"); +/* */ try +/* */ { +/* 158 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 159 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 160 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 161 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 162 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ +/* 164 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 165 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ } catch (CorruptIndexException e) { +/* 167 */ e.printStackTrace(); +/* 168 */ System.exit(1); +/* */ } catch (LockObtainFailedException e) { +/* 170 */ e.printStackTrace(); +/* 171 */ System.exit(1); +/* */ } catch (IOException e) { +/* 173 */ e.printStackTrace(); +/* 174 */ System.exit(1); +/* */ } +/* */ } +/* */ +/* */ public HarvesterThreaded(File docDir, File index_dir, String mdProviderUrl) throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 180 */ this(docDir, index_dir, null, mdProviderUrl, null); +/* */ } +/* */ +/* */ protected HashMap loadLanguages() +/* */ { +/* 187 */ File languageFile = new File(this.docDir + "/" + this.languageFileName); +/* 188 */ String languageFilePath = this.docDir + "/" + this.languageFileName; +/* 189 */ HashMap languages = new HashMap(); +/* 190 */ boolean relativ = true; +/* 191 */ if (this.languageFileName == null) +/* 192 */ return null; +/* 193 */ if (!languageFile.exists()) +/* */ { +/* 195 */ languageFile = new File(this.languageFileName); +/* 196 */ languageFilePath = this.languageFileName; +/* 197 */ relativ = false; +/* 198 */ if (!languageFile.exists()) +/* 199 */ return null; +/* */ } +/* */ BufferedReader in; +/* */ try { +/* 203 */ in = new BufferedReader(new FileReader(languageFilePath)); +/* */ } catch (FileNotFoundException e) { +/* 205 */ return null; +/* */ } +/* */ +/* 208 */ String zeile = null; +/* */ try { +/* 210 */ while ((zeile = in.readLine()) != null) { +/* 211 */ String[] splitted = zeile.replace("\"", "").split("[,]"); +/* 212 */ if (splitted.length == 2) +/* 213 */ if (relativ) +/* 214 */ languages.put(this.docDir + "/" + splitted[0], splitted[1]); +/* */ else +/* 216 */ languages.put(splitted[0], splitted[1]); +/* */ } +/* */ } +/* */ catch (IOException e) { +/* 220 */ e.printStackTrace(); +/* 221 */ return null; +/* */ } +/* */ +/* 224 */ return languages; +/* */ } +/* */ +/* */ public void harvestFromRDF(String rdffilepath) throws InterruptedException, JDOMException { +/* 228 */ Date start = new Date(); +/* 229 */ boolean create = true; +/* */ try +/* */ { +/* 240 */ System.out.println("Indexing to directory '" + this.index_dir + "'..."); +/* 241 */ ArrayList files = getFileListFromRDF(rdffilepath); +/* 242 */ indexDocs(files); +/* 243 */ System.out.println("Optimizing..."); +/* 244 */ this.languageAnalyzers.optimize(); +/* 245 */ this.languageAnalyzers.close(); +/* */ +/* 247 */ Date end = new Date(); +/* 248 */ System.out.println(end.getTime() - start.getTime() + " total milliseconds"); +/* */ } +/* */ catch (IOException e) { +/* 251 */ System.out.println(" caught a " + e.getClass() + +/* 252 */ "\n with message: " + e.getMessage()); +/* */ } +/* */ } +/* */ +/* */ private ArrayList getFileListFromRDF(String rdffilepath) +/* */ throws JDOMException, IOException +/* */ { +/* 260 */ ArrayList ret = new ArrayList(); +/* 261 */ SAXBuilder builder = new SAXBuilder(); +/* */ +/* 263 */ Document doc = builder.build(rdffilepath); +/* */ +/* 265 */ Element el = doc.getRootElement(); +/* */ +/* 267 */ XPath xpath = XPath.newInstance("//MPIWG:archive-path"); +/* 268 */ xpath.addNamespace("MPIWG", "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"); +/* 269 */ List paths = xpath.selectNodes(el); +/* 270 */ for (Element path : paths) { +/* 271 */ ret.add(path.getText()); +/* */ } +/* */ +/* 274 */ return ret; +/* */ } +/* */ +/* */ public void harvestFolder() throws InterruptedException { +/* 278 */ Date start = new Date(); +/* 279 */ boolean create = true; +/* */ try +/* */ { +/* 290 */ System.out.println("Indexing to directory '" + this.index_dir + "'..."); +/* 291 */ indexDocs(this.docDir); +/* 292 */ System.out.println("Optimizing..."); +/* 293 */ this.languageAnalyzers.optimize(); +/* 294 */ this.languageAnalyzers.close(); +/* */ +/* 296 */ Date end = new Date(); +/* 297 */ System.out.println(end.getTime() - start.getTime() + " total milliseconds"); +/* */ } +/* */ catch (IOException e) { +/* 300 */ System.out.println(" caught a " + e.getClass() + +/* 301 */ "\n with message: " + e.getMessage()); +/* */ } +/* */ } +/* */ +/* */ private void indexDocs(ArrayList files) +/* */ throws IOException, InterruptedException +/* */ { +/* 308 */ for (String filename : files) +/* */ { +/* 310 */ indexDocs(new File(this.docDir.getAbsolutePath() + filename)); + if ((DEBUG==true) & (this.filecount>MAXFILES)) + break; +/* */ } +/* */ } +/* */ +/* */ void indexDocs(File file) +/* */ throws IOException, InterruptedException +/* */ { +/* 317 */ if (!file.canRead()) +/* */ return; +/* 319 */ if (file.isDirectory()) +/* */ { +/* 321 */ if ((DEBUG==true) && (this.filecount>MAXFILES)) + return; +/* 325 */ String[] files = file.list(); +/* */ +/* 327 */ String folderName = file.getName(); +/* 328 */ if ((((files != null) ? 1 : 0) & ((excludeFolders.contains(folderName)) ? 0 : 1)) != 0) +/* 329 */ for (int i = 0; i < files.length; ++i) +/* */ { +/* 332 */ indexDocs(new File(file, files[i])); + if ((DEBUG==true) && (this.filecount>MAXFILES)) + break; +/* */ } +/* */ } +/* 335 */ else if (isTextFile(file)) +/* */ { +/* 338 */ processFile(file); +/* */ } +/* */ else +/* */ { +/* 342 */ System.out.println("not adding " + file); +/* */ } +/* */ } +/* */ +/* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 348 */ int freeThread = -1; +/* 349 */ while (freeThread == -1) +/* */ { +/* 351 */ freeThread = waitForFreeThread(); +/* */ } +/* */ +/* 355 */ if (this.textLanguage == null) +/* 356 */ this.textLanguage = loadLanguages(); +/* 357 */ this.mythreads[freeThread] = new ProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder); +/* 358 */ this.mythreads[freeThread].start(); +/* 359 */ System.out.println("New process started:" + freeThread); +/* */ } +/* */ +/* */ protected int waitForFreeThread() +/* */ { +/* 367 */ for (int i = 0; i < maxThread; ++i) +/* */ { +/* 369 */ if (this.mythreads[i] == null) +/* 370 */ return i; +/* 371 */ if (!this.mythreads[i].done) +/* */ continue; +/* 373 */ this.filecount += 1; +/* 374 */ System.out.println("filecount:" + this.filecount); +/* 375 */ return i; +/* */ } +/* */ +/* 378 */ return -1; +/* */ } +/* */ +/* */ private boolean isTextFile(File file) +/* */ { +/* 392 */ String fn = file.getName(); +/* */ +/* 394 */ String[] splitted = fn.split("[.]"); +/* */ +/* 396 */ String ext = ""; +/* */ +/* 398 */ if (splitted.length > 1) +/* */ { +/* 400 */ ext = splitted[(splitted.length - 1)]; +/* */ } +/* */ +/* 403 */ return fileTypesToIndex.contains(ext); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/MyResolver.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/MyResolver.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,45 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +public class MyResolver implements EntityResolver { + + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + // TODO Auto-generated method stub + // System.out.println("public:"+publicId); + // System.out.println("static"+systemId); + //File f = new File( + // "/Users/dwinter/Documents/Projekte/data-mining/developmentEclipse/fulltextIndexer/catalog/xhtml1-transitional.dtd"); + + + //if (!f.exists()) { + // System.err.println("Cant't find xhtml-dtd: MyResolver"); + // return null; + //} + //if (publicId.equals("-//W3C//DTD XHTML 1.0 Transitional//EN")) + // return new InputSource(f.getAbsolutePath()); + + if (publicId.equals("-//W3C//DTD XHTML 1.0 Transitional//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml1-transitional.dtd"); + return new InputSource(res); + } else if (publicId.equals("-//W3C//ENTITIES Latin 1 for XHTML//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-lat1.ent"); + return new InputSource(res); + } else if (publicId.equals("-//W3C//ENTITIES Symbols for XHTML//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-symbol.ent"); + return new InputSource(res); + } else if (publicId.equals("-//W3C//ENTITIES Special for XHTML//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-special.ent"); + return new InputSource(res); + } + return new InputSource(systemId); + } + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/OCRHarvesterThreaded.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/.svn/text-base/OCRHarvesterThreaded.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,102 @@ + +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread; +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.HashMap; +/* */ import org.apache.lucene.analysis.de.GermanAnalyzer; +/* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer; +/* */ import org.apache.lucene.analysis.standard.StandardAnalyzer; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ import org.apache.lucene.util.Version; +/* */ +/* */ public class OCRHarvesterThreaded extends HarvesterThreaded +/* */ { +/* */ private String preferedLanguage; +/* */ +/* */ public OCRHarvesterThreaded() +/* */ { +/* */ } +/* */ +/* */ public OCRHarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 41 */ this.index_dir = index_dir; +/* 42 */ this.languageFileName = languageFileName; +/* 43 */ this.docDir = docDir; +/* 44 */ this.preferedLanguage = lang; +/* */ +/* 46 */ this.mdProviderUrl = mdProviderUrl; +/* 47 */ for (int i = 0; i < maxThread; ++i) +/* */ { +/* 49 */ this.mythreads[i] = null; +/* */ } +/* */ +/* 52 */ init_languages(); +/* */ } +/* */ +/* */ private void init_languages() { +/* 56 */ this.languageToISO.put("German", "de"); +/* 57 */ this.languageToISO.put("French", "fr"); +/* 58 */ this.languageToISO.put("English", "en"); +/* 59 */ this.languageToISO.put("German-f", "de-f"); +/* */ +/* 61 */ this.supportedLanguageFolder.put("deu", "de"); +/* 62 */ this.supportedLanguageFolder.put("deu-f", "de"); +/* 63 */ this.supportedLanguageFolder.put("fra", "fr"); +/* 64 */ this.supportedLanguageFolder.put("eng", "en"); +/* 65 */ this.supportedLanguageFolder.put("lic", "la"); +/* */ try +/* */ { +/* 68 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 69 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 70 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 71 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 72 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ +/* 74 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 75 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ } catch (CorruptIndexException e) { +/* 77 */ e.printStackTrace(); +/* 78 */ System.exit(1); +/* */ } catch (LockObtainFailedException e) { +/* 80 */ e.printStackTrace(); +/* 81 */ System.exit(1); +/* */ } catch (IOException e) { +/* 83 */ e.printStackTrace(); +/* 84 */ System.exit(1); +/* */ } +/* */ } +/* */ +/* */ public OCRHarvesterThreaded(File docDir, File index_dir, String mdProviderUrl, String preferedLanguage) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 92 */ this(docDir, index_dir, null, mdProviderUrl, preferedLanguage); +/* */ } +/* */ +/* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 97 */ int freeThread = -1; +/* 98 */ while (freeThread == -1) +/* */ { +/* 100 */ freeThread = waitForFreeThread(); +/* */ } +/* */ +/* 104 */ if (this.textLanguage == null) +/* 105 */ this.textLanguage = loadLanguages(); +/* 106 */ this.mythreads[freeThread] = new OCRProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder); +/* 107 */ this.mythreads[freeThread].start(); +/* 108 */ System.out.println("New process started:" + freeThread); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,53 @@ +K 25 +svn:wc:ra_dav:version-url +V 91 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI +END +OCRHarvesterCLIMD.java +K 25 +svn:wc:ra_dav:version-url +V 114 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.java +END +HarvesterCLI.java +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLI.java +END +DocHarvesterCLIMD.java +K 25 +svn:wc:ra_dav:version-url +V 114 +/svn/!svn/ver/2662/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIMD.java +END +DocHarvesterCLIRDFMD.java +K 25 +svn:wc:ra_dav:version-url +V 117 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.java +END +OCRHarvesterCLI.java +K 25 +svn:wc:ra_dav:version-url +V 112 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLI.java +END +OCRHarvesterCLIRDFMD.java +K 25 +svn:wc:ra_dav:version-url +V 117 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.java +END +HarvesterCLIMD.java +K 25 +svn:wc:ra_dav:version-url +V 111 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIMD.java +END +HarvesterCLIRDFMD.java +K 25 +svn:wc:ra_dav:version-url +V 114 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIRDFMD.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,307 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DocHarvesterCLIMD.java +file +2662 + + + +2010-06-17T07:34:42.000000Z +2da2065e8dd45904d988c260473bd265 +2010-06-28T15:47:27.673424Z +2662 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3372 + +DocHarvesterCLIRDFMD.java +file +2873 + + + +2010-06-29T07:26:23.000000Z +46d89597c31ebc12ae92cedb94bed54c +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3759 + +HarvesterCLI.java +file + + + + +2010-06-16T07:59:06.000000Z +554a4414b4061dbabf33734f32f078b4 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3198 + +HarvesterCLIMD.java +file + + + + +2010-06-16T14:51:22.000000Z +74c36b7ff6a908d3cbb45407b2a5c831 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3345 + +HarvesterCLIRDFMD.java +file + + + + +2010-06-16T19:00:08.000000Z +82b5ceb95a5480c37d4a90144c918c28 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3528 + +OCRHarvesterCLI.java +file + + + + +2010-06-16T14:51:22.000000Z +2de468cb40c9a9e14bf1c1d5c81b7601 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3209 + +OCRHarvesterCLIMD.java +file + + + + +2010-06-16T14:51:22.000000Z +cb6796372a4897a29a5dc1e297f65df1 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3375 + +OCRHarvesterCLIRDFMD.java +file + + + + +2010-06-16T18:56:41.000000Z +ca4d1f97d5257bce7695f8081a37a340 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3474 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIRDFMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/DocHarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLI.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLI.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIRDFMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/HarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLI.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLI.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIRDFMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/prop-base/OCRHarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,5 @@ +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class DocHarvesterCLIMD +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.DocHarvesterCLIMD --lang=\njava de.mpiwg.itgroup.fulltext.harvester.DocHarvesterCLIMD --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.DocHarvesterCLIMD "; +/* */ +/* 23 */ if ((args.length != 4) && (args.length != 3)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 3) { +/* 42 */ doTheHarvest(docDir, index_dir, args[2], null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[3].split("="); +/* 46 */ if (options.length != 2) { +/* 47 */ System.err.println("wrong options:" + args[3]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIRDFMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/DocHarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,108 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded; + + + +public class DocHarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --lang=\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --langfile=\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + /** + * @param rdfFile Pfad to the RDF file (ECHO-rdf-Format) + * @param docDir + * @param index_dir + * @param mdProviderUrl + * @param languageFile + * @throws CorruptIndexException + * @throws LockObtainFailedException + * @throws IOException + * @throws InterruptedException + */ + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLI.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLI.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class HarvesterCLI +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLI --lang=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLI --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLI "; +/* */ +/* 23 */ if ((args.length != 3) && (args.length != 2)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 2) { +/* 42 */ doTheHarvest(docDir, index_dir, null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[2].split("="); +/* 46 */ if (options.length != 2) { +/* 47 */ System.err.println("wrong options:" + args[2]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, null, null, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, languageFile, null, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class HarvesterCLIMD +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIMD --lang=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIMD --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIMD "; +/* */ +/* 23 */ if ((args.length != 4) && (args.length != 3)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 3) { +/* 42 */ doTheHarvest(docDir, index_dir, args[2], null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[3].split("="); +/* 46 */ if (options.length != 3) { +/* 47 */ System.err.println("wrong options:" + args[3]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/HarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,98 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; + + + +public class HarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --lang=\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --langfile=\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLI.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLI.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,68 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class OCRHarvesterCLI +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 21 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLI --lang=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLI --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLI "; +/* */ +/* 24 */ if ((args.length != 3) && (args.length != 2)) { +/* 25 */ System.err.println("Usage: " + usage); +/* 26 */ System.exit(1); +/* */ } +/* */ +/* 34 */ File docDir = new File(args[0]); +/* 35 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 36 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 37 */ System.exit(1); +/* */ } +/* */ +/* 40 */ File index_dir = new File(args[1]); +/* */ +/* 42 */ if (args.length == 2) { +/* 43 */ doTheHarvest(docDir, index_dir, null); +/* */ } +/* */ else { +/* 46 */ String[] options = args[2].split("="); +/* 47 */ if (options.length != 2) { +/* 48 */ System.err.println("wrong options:" + args[2]); +/* 49 */ System.exit(1); +/* */ } +/* */ +/* 52 */ if (options[0].equals("--lang")) { +/* 53 */ doTheHarvestLanguage(docDir, index_dir, options[1]); +/* 54 */ } else if (options[0].equals("--langfile")) { +/* 55 */ doTheHarvest(docDir, index_dir, options[1]); +/* */ } else { +/* 57 */ System.err.println("wrong options:" + options[0]); +/* 58 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, null, null, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, languageFile, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class OCRHarvesterCLIMD +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD --lang=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD "; +/* */ +/* 23 */ if ((args.length != 4) && (args.length != 3)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 3) { +/* 42 */ doTheHarvest(docDir, index_dir, args[2], null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[3].split("="); +/* 46 */ if (options.length != 3) { +/* 47 */ System.err.println("wrong options:" + args[3]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIRDFMD.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/.svn/text-base/OCRHarvesterCLIRDFMD.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,97 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; + + + +public class OCRHarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --lang=\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --langfile=\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIMD.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIMD.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class DocHarvesterCLIMD +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.DocHarvesterCLIMD --lang=\njava de.mpiwg.itgroup.fulltext.harvester.DocHarvesterCLIMD --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.DocHarvesterCLIMD "; +/* */ +/* 23 */ if ((args.length != 4) && (args.length != 3)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 3) { +/* 42 */ doTheHarvest(docDir, index_dir, args[2], null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[3].split("="); +/* 46 */ if (options.length != 2) { +/* 47 */ System.err.println("wrong options:" + args[3]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/DocHarvesterCLIRDFMD.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,108 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.DocHarvesterThreaded; + + + +public class DocHarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --lang=\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --langfile=\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + /** + * @param rdfFile Pfad to the RDF file (ECHO-rdf-Format) + * @param docDir + * @param index_dir + * @param mdProviderUrl + * @param languageFile + * @throws CorruptIndexException + * @throws LockObtainFailedException + * @throws IOException + * @throws InterruptedException + */ + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + DocHarvesterThreaded harvester = new DocHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLI.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class HarvesterCLI +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLI --lang=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLI --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLI "; +/* */ +/* 23 */ if ((args.length != 3) && (args.length != 2)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 2) { +/* 42 */ doTheHarvest(docDir, index_dir, null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[2].split("="); +/* 46 */ if (options.length != 2) { +/* 47 */ System.err.println("wrong options:" + args[2]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, null, null, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, languageFile, null, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLI + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIMD.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIMD.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class HarvesterCLIMD +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIMD --lang=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIMD --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIMD "; +/* */ +/* 23 */ if ((args.length != 4) && (args.length != 3)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 3) { +/* 42 */ doTheHarvest(docDir, index_dir, args[2], null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[3].split("="); +/* 46 */ if (options.length != 3) { +/* 47 */ System.err.println("wrong options:" + args[3]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ HarvesterThreaded harvester = new HarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.HarvesterCLIMD + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIRDFMD.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/HarvesterCLIRDFMD.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,98 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded; +import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; + + + +public class HarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --lang=\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --langfile=\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + HarvesterThreaded harvester = new HarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLI.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLI.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,68 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class OCRHarvesterCLI +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 21 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLI --lang=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLI --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLI "; +/* */ +/* 24 */ if ((args.length != 3) && (args.length != 2)) { +/* 25 */ System.err.println("Usage: " + usage); +/* 26 */ System.exit(1); +/* */ } +/* */ +/* 34 */ File docDir = new File(args[0]); +/* 35 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 36 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 37 */ System.exit(1); +/* */ } +/* */ +/* 40 */ File index_dir = new File(args[1]); +/* */ +/* 42 */ if (args.length == 2) { +/* 43 */ doTheHarvest(docDir, index_dir, null); +/* */ } +/* */ else { +/* 46 */ String[] options = args[2].split("="); +/* 47 */ if (options.length != 2) { +/* 48 */ System.err.println("wrong options:" + args[2]); +/* 49 */ System.exit(1); +/* */ } +/* */ +/* 52 */ if (options[0].equals("--lang")) { +/* 53 */ doTheHarvestLanguage(docDir, index_dir, options[1]); +/* 54 */ } else if (options[0].equals("--langfile")) { +/* 55 */ doTheHarvest(docDir, index_dir, options[1]); +/* */ } else { +/* 57 */ System.err.println("wrong options:" + options[0]); +/* 58 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, null, null, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, languageFile, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLI + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIMD.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ +/* */ public class OCRHarvesterCLIMD +/* */ { +/* */ public static void main(String[] args) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 20 */ String usage = "java de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD --lang=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD --langfile=\njava de.mpiwg.itgroup.fulltext.harvester.OCRHarvesterCLIMD "; +/* */ +/* 23 */ if ((args.length != 4) && (args.length != 3)) { +/* 24 */ System.err.println("Usage: " + usage); +/* 25 */ System.exit(1); +/* */ } +/* */ +/* 33 */ File docDir = new File(args[0]); +/* 34 */ if ((!docDir.exists()) || (!docDir.canRead())) { +/* 35 */ System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); +/* 36 */ System.exit(1); +/* */ } +/* */ +/* 39 */ File index_dir = new File(args[1]); +/* */ +/* 41 */ if (args.length == 3) { +/* 42 */ doTheHarvest(docDir, index_dir, args[2], null); +/* */ } +/* */ else { +/* 45 */ String[] options = args[3].split("="); +/* 46 */ if (options.length != 3) { +/* 47 */ System.err.println("wrong options:" + args[3]); +/* 48 */ System.exit(1); +/* */ } +/* */ +/* 51 */ if (options[0].equals("--lang")) { +/* 52 */ doTheHarvestLanguage(docDir, index_dir, args[2], options[1]); +/* 53 */ } else if (options[0].equals("--langfile")) { +/* 54 */ doTheHarvest(docDir, index_dir, args[2], options[1]); +/* */ } else { +/* 56 */ System.err.println("wrong options:" + options[0]); +/* 57 */ System.exit(1); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected static void doTheHarvestLanguage(File docDir, File index_dir, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 65 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, null, mdProviderUrl, lang); +/* 66 */ harvester.harvestFolder(); +/* */ } +/* */ +/* */ protected static void doTheHarvest(File docDir, File index_dir, String mdProviderUrl, String languageFile) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException +/* */ { +/* 72 */ OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir, index_dir, languageFile, mdProviderUrl, null); +/* 73 */ harvester.harvestFolder(); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.CLI.OCRHarvesterCLIMD + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/CLI/OCRHarvesterCLIRDFMD.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,97 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester.CLI; + +import java.io.File; +import java.io.IOException; + +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.LockObtainFailedException; +import org.jdom.JDOMException; + +import de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded; + + + +public class OCRHarvesterCLIRDFMD { + + /** Index all text files under a directory. + * @throws IOException + * @throws LockObtainFailedException + * @throws CorruptIndexException + * @throws InterruptedException */ + public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + String usage = "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --lang=\n" + + "java de.mpiwg.itgroup.fulltext.harvester.HarvesterCLIRDF --langfile=\n"; + + + if (args.length != 4 & args.length != 5) { + System.err.println("Usage: " + usage); + System.exit(1); + } + +// if (INDEX_DIR.exists()) { +// System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first"); +// System.exit(1); +// } + + File docDir = new File(args[1]); + if ((!docDir.exists()) || (!docDir.canRead())) { + System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File rdfFile = new File(args[0]); + if (!rdfFile.exists() || !rdfFile.canRead()) { + System.out.println("RDFFile directory '" +rdfFile.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); + System.exit(1); + } + + final File index_dir= new File(args[2]); + final String mdProviderUrl= args[3]; + + if (args.length == 4){ + doTheHarvest(rdfFile, docDir, index_dir, mdProviderUrl,null); + } + else { + String[] options = args[4].split("="); + if (options.length != 3) { + System.err.println("wrong options:" + args[4]); + System.exit(1); + } + + if (options[0].equals("--lang")) { + doTheHarvestLanguage(rdfFile,docDir, index_dir, args[3], options[1]); + } else if (options[0].equals("--langfile")) { + doTheHarvest(rdfFile,docDir, index_dir, args[3], options[1]); + } else { + System.err.println("wrong options:" + options[0]); + System.exit(1); + } + } + } + + private static void doTheHarvest(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String languageFile) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir,index_dir,languageFile,mdProviderUrl,null); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + e.printStackTrace(); + } + + } + + private static void doTheHarvestLanguage(File rdfFile, File docDir,File index_dir, + String mdProviderUrl, String lang) throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { + OCRHarvesterThreaded harvester = new OCRHarvesterThreaded(docDir,index_dir,null,mdProviderUrl,lang); + try { + harvester.harvestFromRDF(rdfFile.getAbsolutePath()); + } catch (JDOMException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/DocHarvesterThreaded.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/DocHarvesterThreaded.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,577 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester; + +/* Harveste jeweils ein komplettes Buch in einen Eintrag + * + * */ +import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; + +import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; + +import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; + +import java.io.BufferedReader; + +import java.io.File; + +import java.io.FileNotFoundException; + +import java.io.BufferedInputStream; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.StringWriter; + +import java.io.IOException; + +import java.io.PrintStream; + +import java.util.ArrayList; + +import java.util.Arrays; + +import java.util.Date; + +import java.util.HashMap; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMResult; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.commons.io.IOUtils; +import org.apache.lucene.analysis.de.GermanAnalyzer; + +import org.apache.lucene.analysis.fr.FrenchAnalyzer; + +import org.apache.lucene.analysis.standard.StandardAnalyzer; + +import org.apache.lucene.index.CorruptIndexException; + +import org.apache.lucene.store.LockObtainFailedException; + +import org.apache.lucene.util.Version; +import org.apache.ws.commons.serialize.XMLWriterImpl; + +import org.jdom.Document; + +import org.jdom.Element; + +import org.jdom.JDOMException; + +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; +import org.w3c.dom.DocumentFragment; +import org.xml.sax.SAXException; + +import com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl; + +public class DocHarvesterThreaded { + private static final boolean DEBUG = false; + private static final int MAXFILES = 3; + + //private static final String PREFIX = "/tmp/composed/files"; + private static final String PREFIX = "/Volumes/data/composed/files"; + private static final String COMPOSEDFN = "doc.xml"; + private static final boolean CREATE_NEW = false; + + protected static ArrayList fileTypesToIndex = new ArrayList( + Arrays.asList(new String[] { "xml" })); + + protected static ArrayList excludeFolders = new ArrayList( + Arrays.asList(new String[] { "OCR" })); + protected static boolean indexMetaPriority = false; + + private static String specialMode = ""; + protected static int maxThread = 30; + protected File docDir; + protected File index_dir; + protected HashMap textLanguage = null; + protected HashMap languageToISO = new HashMap(); + protected LanguageAnalyzers languageAnalyzers = new LanguageAnalyzers(); + + private int counter = 0; + protected String languageFileName; + protected ProcessFileThread[] mythreads = new ProcessFileThread[maxThread]; + private int filecount = 0; + + protected String mdProviderUrl = null; + private String preferedLanguage; + protected HashMap supportedLanguageFolder = new HashMap(); + private int completedFiles = 0; // counter for all files completed and indexed + + public DocHarvesterThreaded() { + } + + public DocHarvesterThreaded(File docDir, File index_dir, + String languageFileName, String mdProviderUrl, String lang) + throws CorruptIndexException, LockObtainFailedException, + IOException { + /* 119 */this.docDir = docDir; + /* 120 */this.languageFileName = languageFileName; + /* 121 */this.preferedLanguage = lang; + + /* 133 */this.mdProviderUrl = mdProviderUrl; + + /* 135 */this.index_dir = index_dir; + + /* 137 */for (int i = 0; i < maxThread; ++i) { + /* 139 */this.mythreads[i] = null; + } + + /* 142 */init_languages(); + } + + private void init_languages() { + /* 146 */this.languageToISO.put("German", "de"); + /* 147 */this.languageToISO.put("French", "fr"); + /* 148 */this.languageToISO.put("English", "en"); + /* 149 */this.languageToISO.put("German-f", "de-f"); + + /* 151 */this.supportedLanguageFolder.put("deu", "de"); + /* 152 */this.supportedLanguageFolder.put("deu-f", "de"); + /* 153 */this.supportedLanguageFolder.put("fra", "fr"); + /* 154 */this.supportedLanguageFolder.put("eng", "en"); + /* 155 */this.supportedLanguageFolder.put("lic", "la"); + try { + /* 158 */this.languageAnalyzers.add(new LanguageAnalyzer("de", + new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 160 */this.languageAnalyzers.add(new LanguageAnalyzer("en", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 161 */this.languageAnalyzers.add(new LanguageAnalyzer("fr", + new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 162 */this.languageAnalyzers.add(new LanguageAnalyzer("la", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + + /* 164 */this.languageAnalyzers.add(new LanguageAnalyzer("all", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + /* 165 */this.languageAnalyzers.add(new LanguageAnalyzer("morph", + new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); + } catch (CorruptIndexException e) { + /* 167 */e.printStackTrace(); + /* 168 */System.exit(1); + } catch (LockObtainFailedException e) { + /* 170 */e.printStackTrace(); + /* 171 */System.exit(1); + } catch (IOException e) { + /* 173 */e.printStackTrace(); + /* 174 */System.exit(1); + } + } + + public DocHarvesterThreaded(File docDir, File index_dir, + String mdProviderUrl) throws CorruptIndexException, + LockObtainFailedException, IOException { + /* 180 */this(docDir, index_dir, null, mdProviderUrl, null); + } + + protected HashMap loadLanguages() { + /* 187 */File languageFile = new File(this.docDir + "/" + + this.languageFileName); + /* 188 */String languageFilePath = this.docDir + "/" + + this.languageFileName; + /* 189 */HashMap languages = new HashMap(); + /* 190 */boolean relativ = true; + /* 191 */if (this.languageFileName == null) + /* 192 */return null; + /* 193 */if (!languageFile.exists()) { + /* 195 */languageFile = new File(this.languageFileName); + /* 196 */languageFilePath = this.languageFileName; + /* 197 */relativ = false; + /* 198 */if (!languageFile.exists()) + /* 199 */return null; + } + BufferedReader in; + try { + /* 203 */in = new BufferedReader(new FileReader(languageFilePath)); + } catch (FileNotFoundException e) { + /* 205 */return null; + } + + /* 208 */String zeile = null; + try { + /* 210 */while ((zeile = in.readLine()) != null) { + /* 211 */String[] splitted = zeile.replace("\"", "").split( + "[,]"); + /* 212 */if (splitted.length == 2) + /* 213 */if (relativ) + /* 214 */languages.put(this.docDir + "/" + splitted[0], + splitted[1]); + else + /* 216 */languages.put(splitted[0], splitted[1]); + } + } catch (IOException e) { + /* 220 */e.printStackTrace(); + /* 221 */return null; + } + + /* 224 */return languages; + } + + public void harvestFromRDF(String rdffilepath) throws InterruptedException, + JDOMException { + /* 228 */Date start = new Date(); + /* 229 */boolean create = true; + try { + /* 240 */System.out.println("Indexing to directory '" + + this.index_dir + "'..."); + /* 241 */ArrayList files = getFileListFromRDF(rdffilepath); + /* 242 */indexDocs(files); + /* 243 */System.out.println("Optimizing..."); + /* 244 */this.languageAnalyzers.optimize(); + /* 245 */this.languageAnalyzers.close(); + + /* 247 */Date end = new Date(); + /* 248 */System.out.println(end.getTime() - start.getTime() + + " total milliseconds"); + } catch (IOException e) { + /* 251 */System.out.println(" caught a " + e.getClass() + + /* 252 */"\n with message: " + e.getMessage()); + } + } + + private ArrayList getFileListFromRDF(String rdffilepath) + throws JDOMException, IOException { + /* 260 */ArrayList ret = new ArrayList(); + /* 261 */SAXBuilder builder = new SAXBuilder(); + + /* 263 */Document doc = builder.build(rdffilepath); + + /* 265 */Element el = doc.getRootElement(); + + /* 267 */XPath xpath = XPath.newInstance("//MPIWG:archive-path"); + /* 268 */xpath.addNamespace("MPIWG", + "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"); + /* 269 */List paths = xpath.selectNodes(el); + /* 270 */for (Element path : paths) { + /* 271 */ret.add(path.getText()); + } + + /* 274 */return ret; + } + + public void harvestFolder() throws InterruptedException { + /* 278 */Date start = new Date(); + /* 279 */boolean create = true; + try { + /* 290 */System.out.println("Indexing to directory '" + + this.index_dir + "'..."); + /* 291 */indexDocs(this.docDir); + /* 292 */System.out.println("Optimizing..."); + /* 293 */this.languageAnalyzers.optimize(); + /* 294 */this.languageAnalyzers.close(); + + /* 296 */Date end = new Date(); + /* 297 */System.out.println(end.getTime() - start.getTime() + + " total milliseconds"); + } catch (IOException e) { + /* 300 */System.out.println(" caught a " + e.getClass() + + /* 301 */"\n with message: " + e.getMessage()); + } + } + + private void indexDocs(ArrayList files) throws IOException, + InterruptedException { + /* 308 */for (String filename : files) { + /* 310 */indexDocs(new File(this.docDir.getAbsolutePath() + + filename)); + if ((DEBUG == true) & (this.filecount > MAXFILES)) + break; + } + } + + void indexDocs(File file) throws IOException, InterruptedException { + /* 317 */if (!file.canRead()) + return; + /* 319 */ + /* 321 */if ((DEBUG == true) && (this.filecount > MAXFILES)) + return; + /* 325 */String[] files = file.list(); + + /* 327 */String folderName = file.getName(); + + boolean notExists = !checkFileExists(file); + boolean createNew = CREATE_NEW || notExists; + // boolean createNew = true; + + boolean fileStillEmpty = true; + if (createNew) { + clearFile(file); // loesche das gesamtfile + } else { + fileStillEmpty = false; // assume that file is not empty, if it already exists + } + + + if ((((files != null) ? 1 : 0) & ((excludeFolders.contains(folderName)) ? 0 + : 1)) != 0) { + for (int i = 0; i < files.length; ++i) { + File nextFile = new File(file, files[i]); + + if (nextFile.isDirectory()) // directory dann gehe in die + // naechste ebene + indexDocs(nextFile); + + else if (isTextFile(nextFile)) { + + if (createNew) { + fileStillEmpty = false; //datei hat jetzt einen Inhalt + compose(file, nextFile); // fuege das file an das + // gesamtfilean + } + + } + if ((DEBUG == true) && (this.filecount > MAXFILES)) + break; + } + if (createNew) { + if (fileStillEmpty){ + deleteComposedFile(file); // file hat keinen inhalt dann loeschen + } else { + finishFile(file); + } + } + + if (!fileStillEmpty) + processCompleteFile(file); + /* 335 */} else { + /* 342 */System.out.println("not adding " + file); + } + } + + private void finishFile(File folder) { + File cf = getComposedFile(folder); + System.out.println(); + try { + System.out.println("finish file:" + cf.getCanonicalPath()); + FileWriter fw = new FileWriter(cf, true); + + fw.write(""); + fw.close(); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private boolean deleteComposedFile(File folder) { + File cf = getComposedFile(folder); + try { + System.out.println("file deleted, because empty:" + cf.getCanonicalPath()); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return cf.delete(); + } + + + private void processCompleteFile(File folder) { + System.out.println("Completed File:"+String.valueOf(completedFiles++)); + File cf = getComposedFile(folder); + try { + processFile(cf); + } catch (CorruptIndexException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (LockObtainFailedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private boolean checkFileExists(File folder) { + File cf = getComposedFile(folder); + return cf.exists(); + + } + + private void clearFile(File folder) { + File cf = getComposedFile(folder); + cf.delete(); + try { + File dir = cf.getParentFile(); + if (false == dir.exists()) { + dir.mkdirs(); + } + + cf.createNewFile(); + + FileWriter fw = new FileWriter(cf); + fw.write(""); + fw.close(); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private void compose(File folder, File file) { + File cf = getComposedFile(folder); + try { + System.out.println("Adding" + file.getCanonicalPath()); + //FileWriter fw = new FileWriter(cf, true); + + FileOutputStream stream = new FileOutputStream(cf,true); + + OutputStreamWriter fw = new OutputStreamWriter(stream, "utf-8"); + + String filteredDocument=""; + try { + filteredDocument = getFilteredFile(file); + } catch (TransformerException e) { + filteredDocument = ""; + }catch (ParserConfigurationException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SAXException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + fw.append(filteredDocument); + fw.write(""); + fw.close(); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + private String getFilteredFile(File file) throws IOException, + TransformerException, ParserConfigurationException, SAXException { + + // String txt = IOUtils.toString(new FileInputStream(file)); + // get rid of the entities + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer t = tf.newTransformer(); + + + //OutputStream output = new ByteArrayOutputStream(); + + //BufferedWriter sw = new BufferedWriter(new OutputStreamWriter(output, "utf-8")); + + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setNamespaceAware(true); + dbf.setValidating(false); + DocumentBuilder db = dbf.newDocumentBuilder(); + db.setEntityResolver(new MyResolver()); + org.w3c.dom.Document doc = db.parse(file); + + StringWriter sw = new StringWriter(); + StreamResult sr = new StreamResult(sw); + + org.w3c.dom.Document tgtDoc = db.newDocument(); + DocumentFragment fragment = tgtDoc.createDocumentFragment(); + DOMResult tgtDom = new DOMResult( fragment ); + + t.setOutputProperty(OutputKeys.ENCODING, "utf-8"); + t.transform(new DOMSource(doc), sr); + t.transform(new DOMSource(doc), tgtDom); + + String txt = sw.toString(); + + + + Pattern p = Pattern.compile("(.*)", Pattern.DOTALL); + Matcher m = p.matcher(txt); + if (m.find()) + if (m.groupCount() > 0) { + return m.group(1); + } + return ""; + } + + private File getComposedFile(File folder) { + try { + String path = folder.getCanonicalPath(); + String newPath = PREFIX + path + "/" + COMPOSEDFN; + return new File(newPath); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + } + + protected void processFile(File file) throws CorruptIndexException, + LockObtainFailedException, IOException { + /* 348 */int freeThread = -1; + /* 349 */while (freeThread == -1) { + /* 351 */freeThread = waitForFreeThread(); + } + + /* 355 */if (this.textLanguage == null) + /* 356 */this.textLanguage = loadLanguages(); + /* 357 */this.mythreads[freeThread] = new ProcessFileThread( + this.languageAnalyzers, file, this.languageFileName, + this.textLanguage, this.mdProviderUrl, this.preferedLanguage, + this.languageToISO, this.supportedLanguageFolder); + /* 358 */this.mythreads[freeThread].start(); + /* 359 */System.out.println("New process started:" + freeThread); + } + + protected int waitForFreeThread() { + /* 367 */for (int i = 0; i < maxThread; ++i) { + /* 369 */if (this.mythreads[i] == null) + /* 370 */return i; + /* 371 */if (!this.mythreads[i].done) + continue; + /* 373 */this.filecount += 1; + /* 374 */System.out.println("filecount:" + this.filecount); + /* 375 */return i; + } + + /* 378 */return -1; + } + + private boolean isTextFile(File file) { + /* 392 */String fn = file.getName(); + + /* 394 */String[] splitted = fn.split("[.]"); + + /* 396 */String ext = ""; + + /* 398 */if (splitted.length > 1) { + /* 400 */ext = splitted[(splitted.length - 1)]; + } + boolean ret = fileTypesToIndex.contains(ext); + /* 403 */return ret; + } + +} + +/* + * Location: /private/tmp/fulltextIndexer.jar Qualified Name: + * de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded JD-Core Version: + * 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/HarvesterThreaded.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/HarvesterThreaded.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,311 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +/* */ import java.io.BufferedReader; +/* */ import java.io.File; +/* */ import java.io.FileNotFoundException; +/* */ import java.io.FileReader; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.ArrayList; +/* */ import java.util.Arrays; +/* */ import java.util.Date; +/* */ import java.util.HashMap; +/* */ import java.util.List; +/* */ import org.apache.lucene.analysis.de.GermanAnalyzer; +/* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer; +/* */ import org.apache.lucene.analysis.standard.StandardAnalyzer; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ import org.apache.lucene.util.Version; +/* */ import org.jdom.Document; +/* */ import org.jdom.Element; +/* */ import org.jdom.JDOMException; +/* */ import org.jdom.input.SAXBuilder; +/* */ import org.jdom.xpath.XPath; +/* */ +/* */ public class HarvesterThreaded +/* */ { +/* */ private static final boolean DEBUG = false; + private static final int MAXFILES = 100; // only used if DEBUG is true +/* 75 */ protected static ArrayList fileTypesToIndex = new ArrayList(Arrays.asList(new String[] { "xml" })); +/* */ +/* 77 */ protected static ArrayList excludeFolders = new ArrayList(Arrays.asList(new String[] { "OCR" })); +/* 78 */ protected static boolean indexMetaPriority = false; +/* */ +/* 81 */ private static String specialMode = ""; +/* 82 */ protected static int maxThread = 30; +/* */ protected File docDir; +/* */ protected File index_dir; +/* 88 */ protected HashMap textLanguage = null; +/* 89 */ protected HashMap languageToISO = new HashMap(); +/* 90 */ protected LanguageAnalyzers languageAnalyzers = new LanguageAnalyzers(); +/* */ +/* 92 */ private int counter = 0; +/* */ protected String languageFileName; +/* 99 */ protected ProcessFileThread[] mythreads = new ProcessFileThread[maxThread]; +/* 100 */ private int filecount = 0; +/* */ +/* 102 */ protected String mdProviderUrl = null; +/* */ private String preferedLanguage; +/* 106 */ protected HashMap supportedLanguageFolder = new HashMap(); +/* */ +/* */ public HarvesterThreaded() +/* */ { +/* */ } +/* */ +/* */ public HarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 119 */ this.docDir = docDir; +/* 120 */ this.languageFileName = languageFileName; +/* 121 */ this.preferedLanguage = lang; +/* */ +/* 133 */ this.mdProviderUrl = mdProviderUrl; +/* */ +/* 135 */ this.index_dir = index_dir; +/* */ +/* 137 */ for (int i = 0; i < maxThread; ++i) +/* */ { +/* 139 */ this.mythreads[i] = null; +/* */ } +/* */ +/* 142 */ init_languages(); +/* */ } +/* */ +/* */ private void init_languages() { +/* 146 */ this.languageToISO.put("German", "de"); +/* 147 */ this.languageToISO.put("French", "fr"); +/* 148 */ this.languageToISO.put("English", "en"); +/* 149 */ this.languageToISO.put("German-f", "de-f"); +/* */ +/* 151 */ this.supportedLanguageFolder.put("deu", "de"); +/* 152 */ this.supportedLanguageFolder.put("deu-f", "de"); +/* 153 */ this.supportedLanguageFolder.put("fra", "fr"); +/* 154 */ this.supportedLanguageFolder.put("eng", "en"); +/* 155 */ this.supportedLanguageFolder.put("lic", "la"); +/* */ try +/* */ { +/* 158 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 159 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 160 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 161 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 162 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ +/* 164 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 165 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ } catch (CorruptIndexException e) { +/* 167 */ e.printStackTrace(); +/* 168 */ System.exit(1); +/* */ } catch (LockObtainFailedException e) { +/* 170 */ e.printStackTrace(); +/* 171 */ System.exit(1); +/* */ } catch (IOException e) { +/* 173 */ e.printStackTrace(); +/* 174 */ System.exit(1); +/* */ } +/* */ } +/* */ +/* */ public HarvesterThreaded(File docDir, File index_dir, String mdProviderUrl) throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 180 */ this(docDir, index_dir, null, mdProviderUrl, null); +/* */ } +/* */ +/* */ protected HashMap loadLanguages() +/* */ { +/* 187 */ File languageFile = new File(this.docDir + "/" + this.languageFileName); +/* 188 */ String languageFilePath = this.docDir + "/" + this.languageFileName; +/* 189 */ HashMap languages = new HashMap(); +/* 190 */ boolean relativ = true; +/* 191 */ if (this.languageFileName == null) +/* 192 */ return null; +/* 193 */ if (!languageFile.exists()) +/* */ { +/* 195 */ languageFile = new File(this.languageFileName); +/* 196 */ languageFilePath = this.languageFileName; +/* 197 */ relativ = false; +/* 198 */ if (!languageFile.exists()) +/* 199 */ return null; +/* */ } +/* */ BufferedReader in; +/* */ try { +/* 203 */ in = new BufferedReader(new FileReader(languageFilePath)); +/* */ } catch (FileNotFoundException e) { +/* 205 */ return null; +/* */ } +/* */ +/* 208 */ String zeile = null; +/* */ try { +/* 210 */ while ((zeile = in.readLine()) != null) { +/* 211 */ String[] splitted = zeile.replace("\"", "").split("[,]"); +/* 212 */ if (splitted.length == 2) +/* 213 */ if (relativ) +/* 214 */ languages.put(this.docDir + "/" + splitted[0], splitted[1]); +/* */ else +/* 216 */ languages.put(splitted[0], splitted[1]); +/* */ } +/* */ } +/* */ catch (IOException e) { +/* 220 */ e.printStackTrace(); +/* 221 */ return null; +/* */ } +/* */ +/* 224 */ return languages; +/* */ } +/* */ +/* */ public void harvestFromRDF(String rdffilepath) throws InterruptedException, JDOMException { +/* 228 */ Date start = new Date(); +/* 229 */ boolean create = true; +/* */ try +/* */ { +/* 240 */ System.out.println("Indexing to directory '" + this.index_dir + "'..."); +/* 241 */ ArrayList files = getFileListFromRDF(rdffilepath); +/* 242 */ indexDocs(files); +/* 243 */ System.out.println("Optimizing..."); +/* 244 */ this.languageAnalyzers.optimize(); +/* 245 */ this.languageAnalyzers.close(); +/* */ +/* 247 */ Date end = new Date(); +/* 248 */ System.out.println(end.getTime() - start.getTime() + " total milliseconds"); +/* */ } +/* */ catch (IOException e) { +/* 251 */ System.out.println(" caught a " + e.getClass() + +/* 252 */ "\n with message: " + e.getMessage()); +/* */ } +/* */ } +/* */ +/* */ private ArrayList getFileListFromRDF(String rdffilepath) +/* */ throws JDOMException, IOException +/* */ { +/* 260 */ ArrayList ret = new ArrayList(); +/* 261 */ SAXBuilder builder = new SAXBuilder(); +/* */ +/* 263 */ Document doc = builder.build(rdffilepath); +/* */ +/* 265 */ Element el = doc.getRootElement(); +/* */ +/* 267 */ XPath xpath = XPath.newInstance("//MPIWG:archive-path"); +/* 268 */ xpath.addNamespace("MPIWG", "http://www.mpiwg-berlin.mpg.de/ns/mpiwg"); +/* 269 */ List paths = xpath.selectNodes(el); +/* 270 */ for (Element path : paths) { +/* 271 */ ret.add(path.getText()); +/* */ } +/* */ +/* 274 */ return ret; +/* */ } +/* */ +/* */ public void harvestFolder() throws InterruptedException { +/* 278 */ Date start = new Date(); +/* 279 */ boolean create = true; +/* */ try +/* */ { +/* 290 */ System.out.println("Indexing to directory '" + this.index_dir + "'..."); +/* 291 */ indexDocs(this.docDir); +/* 292 */ System.out.println("Optimizing..."); +/* 293 */ this.languageAnalyzers.optimize(); +/* 294 */ this.languageAnalyzers.close(); +/* */ +/* 296 */ Date end = new Date(); +/* 297 */ System.out.println(end.getTime() - start.getTime() + " total milliseconds"); +/* */ } +/* */ catch (IOException e) { +/* 300 */ System.out.println(" caught a " + e.getClass() + +/* 301 */ "\n with message: " + e.getMessage()); +/* */ } +/* */ } +/* */ +/* */ private void indexDocs(ArrayList files) +/* */ throws IOException, InterruptedException +/* */ { +/* 308 */ for (String filename : files) +/* */ { +/* 310 */ indexDocs(new File(this.docDir.getAbsolutePath() + filename)); + if ((DEBUG==true) & (this.filecount>MAXFILES)) + break; +/* */ } +/* */ } +/* */ +/* */ void indexDocs(File file) +/* */ throws IOException, InterruptedException +/* */ { +/* 317 */ if (!file.canRead()) +/* */ return; +/* 319 */ if (file.isDirectory()) +/* */ { +/* 321 */ if ((DEBUG==true) && (this.filecount>MAXFILES)) + return; +/* 325 */ String[] files = file.list(); +/* */ +/* 327 */ String folderName = file.getName(); +/* 328 */ if ((((files != null) ? 1 : 0) & ((excludeFolders.contains(folderName)) ? 0 : 1)) != 0) +/* 329 */ for (int i = 0; i < files.length; ++i) +/* */ { +/* 332 */ indexDocs(new File(file, files[i])); + if ((DEBUG==true) && (this.filecount>MAXFILES)) + break; +/* */ } +/* */ } +/* 335 */ else if (isTextFile(file)) +/* */ { +/* 338 */ processFile(file); +/* */ } +/* */ else +/* */ { +/* 342 */ System.out.println("not adding " + file); +/* */ } +/* */ } +/* */ +/* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 348 */ int freeThread = -1; +/* 349 */ while (freeThread == -1) +/* */ { +/* 351 */ freeThread = waitForFreeThread(); +/* */ } +/* */ +/* 355 */ if (this.textLanguage == null) +/* 356 */ this.textLanguage = loadLanguages(); +/* 357 */ this.mythreads[freeThread] = new ProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder); +/* 358 */ this.mythreads[freeThread].start(); +/* 359 */ System.out.println("New process started:" + freeThread); +/* */ } +/* */ +/* */ protected int waitForFreeThread() +/* */ { +/* 367 */ for (int i = 0; i < maxThread; ++i) +/* */ { +/* 369 */ if (this.mythreads[i] == null) +/* 370 */ return i; +/* 371 */ if (!this.mythreads[i].done) +/* */ continue; +/* 373 */ this.filecount += 1; +/* 374 */ System.out.println("filecount:" + this.filecount); +/* 375 */ return i; +/* */ } +/* */ +/* 378 */ return -1; +/* */ } +/* */ +/* */ private boolean isTextFile(File file) +/* */ { +/* 392 */ String fn = file.getName(); +/* */ +/* 394 */ String[] splitted = fn.split("[.]"); +/* */ +/* 396 */ String ext = ""; +/* */ +/* 398 */ if (splitted.length > 1) +/* */ { +/* 400 */ ext = splitted[(splitted.length - 1)]; +/* */ } +/* */ +/* 403 */ return fileTypesToIndex.contains(ext); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.HarvesterThreaded + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/MyResolver.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/MyResolver.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,45 @@ +package de.mpiwg.dwinter.fulltextIndexer.harvester; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +public class MyResolver implements EntityResolver { + + public InputSource resolveEntity(String publicId, String systemId) + throws SAXException, IOException { + // TODO Auto-generated method stub + // System.out.println("public:"+publicId); + // System.out.println("static"+systemId); + //File f = new File( + // "/Users/dwinter/Documents/Projekte/data-mining/developmentEclipse/fulltextIndexer/catalog/xhtml1-transitional.dtd"); + + + //if (!f.exists()) { + // System.err.println("Cant't find xhtml-dtd: MyResolver"); + // return null; + //} + //if (publicId.equals("-//W3C//DTD XHTML 1.0 Transitional//EN")) + // return new InputSource(f.getAbsolutePath()); + + if (publicId.equals("-//W3C//DTD XHTML 1.0 Transitional//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml1-transitional.dtd"); + return new InputSource(res); + } else if (publicId.equals("-//W3C//ENTITIES Latin 1 for XHTML//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-lat1.ent"); + return new InputSource(res); + } else if (publicId.equals("-//W3C//ENTITIES Symbols for XHTML//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-symbol.ent"); + return new InputSource(res); + } else if (publicId.equals("-//W3C//ENTITIES Special for XHTML//EN")){ + InputStream res = getClass().getResourceAsStream("/de/mpiwg/dwinter/fulltextIndexer/catalog/xhtml-special.ent"); + return new InputSource(res); + } + return new InputSource(systemId); + } + +} diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/OCRHarvesterThreaded.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,102 @@ + +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread; +/* */ import de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.HashMap; +/* */ import org.apache.lucene.analysis.de.GermanAnalyzer; +/* */ import org.apache.lucene.analysis.fr.FrenchAnalyzer; +/* */ import org.apache.lucene.analysis.standard.StandardAnalyzer; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.store.LockObtainFailedException; +/* */ import org.apache.lucene.util.Version; +/* */ +/* */ public class OCRHarvesterThreaded extends HarvesterThreaded +/* */ { +/* */ private String preferedLanguage; +/* */ +/* */ public OCRHarvesterThreaded() +/* */ { +/* */ } +/* */ +/* */ public OCRHarvesterThreaded(File docDir, File index_dir, String languageFileName, String mdProviderUrl, String lang) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 41 */ this.index_dir = index_dir; +/* 42 */ this.languageFileName = languageFileName; +/* 43 */ this.docDir = docDir; +/* 44 */ this.preferedLanguage = lang; +/* */ +/* 46 */ this.mdProviderUrl = mdProviderUrl; +/* 47 */ for (int i = 0; i < maxThread; ++i) +/* */ { +/* 49 */ this.mythreads[i] = null; +/* */ } +/* */ +/* 52 */ init_languages(); +/* */ } +/* */ +/* */ private void init_languages() { +/* 56 */ this.languageToISO.put("German", "de"); +/* 57 */ this.languageToISO.put("French", "fr"); +/* 58 */ this.languageToISO.put("English", "en"); +/* 59 */ this.languageToISO.put("German-f", "de-f"); +/* */ +/* 61 */ this.supportedLanguageFolder.put("deu", "de"); +/* 62 */ this.supportedLanguageFolder.put("deu-f", "de"); +/* 63 */ this.supportedLanguageFolder.put("fra", "fr"); +/* 64 */ this.supportedLanguageFolder.put("eng", "en"); +/* 65 */ this.supportedLanguageFolder.put("lic", "la"); +/* */ try +/* */ { +/* 68 */ this.languageAnalyzers.add(new LanguageAnalyzer("de", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 69 */ this.languageAnalyzers.add(new LanguageAnalyzer("de-f", new GermanAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 70 */ this.languageAnalyzers.add(new LanguageAnalyzer("en", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 71 */ this.languageAnalyzers.add(new LanguageAnalyzer("fr", new FrenchAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 72 */ this.languageAnalyzers.add(new LanguageAnalyzer("la", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ +/* 74 */ this.languageAnalyzers.add(new LanguageAnalyzer("all", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* 75 */ this.languageAnalyzers.add(new LanguageAnalyzer("morph", new StandardAnalyzer(Version.LUCENE_30), this.index_dir)); +/* */ } catch (CorruptIndexException e) { +/* 77 */ e.printStackTrace(); +/* 78 */ System.exit(1); +/* */ } catch (LockObtainFailedException e) { +/* 80 */ e.printStackTrace(); +/* 81 */ System.exit(1); +/* */ } catch (IOException e) { +/* 83 */ e.printStackTrace(); +/* 84 */ System.exit(1); +/* */ } +/* */ } +/* */ +/* */ public OCRHarvesterThreaded(File docDir, File index_dir, String mdProviderUrl, String preferedLanguage) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 92 */ this(docDir, index_dir, null, mdProviderUrl, preferedLanguage); +/* */ } +/* */ +/* */ protected void processFile(File file) throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 97 */ int freeThread = -1; +/* 98 */ while (freeThread == -1) +/* */ { +/* 100 */ freeThread = waitForFreeThread(); +/* */ } +/* */ +/* 104 */ if (this.textLanguage == null) +/* 105 */ this.textLanguage = loadLanguages(); +/* 106 */ this.mythreads[freeThread] = new OCRProcessFileThread(this.languageAnalyzers, file, this.languageFileName, this.textLanguage, this.mdProviderUrl, this.preferedLanguage, this.languageToISO, this.supportedLanguageFolder); +/* 107 */ this.mythreads[freeThread].start(); +/* 108 */ System.out.println("New process started:" + freeThread); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.OCRHarvesterThreaded + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,17 @@ +K 25 +svn:wc:ra_dav:version-url +V 98 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors +END +OCRProcessFileThread.java +K 25 +svn:wc:ra_dav:version-url +V 124 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/OCRProcessFileThread.java +END +ProcessFileThread.java +K 25 +svn:wc:ra_dav:version-url +V 121 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/ProcessFileThread.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,103 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +OCRProcessFileThread.java +file +2873 + + + +2010-06-29T08:21:59.000000Z +4db0c65eec60a04ef427bf6d511a7e72 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3809 + +ProcessFileThread.java +file +2873 + + + +2010-06-30T09:27:13.000000Z +07c03789a672827105efd181f9c17e0b +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +21837 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/OCRProcessFileThread.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/OCRProcessFileThread.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/ProcessFileThread.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/prop-base/ProcessFileThread.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/OCRProcessFileThread.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/OCRProcessFileThread.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,82 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.processors; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; +/* */ import de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +/* */ import de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument; +/* */ import java.io.File; +/* */ import java.io.FileNotFoundException; +/* */ import java.io.FileReader; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.io.Reader; +/* */ import java.io.UnsupportedEncodingException; +/* */ import java.util.HashMap; +/* */ import javax.xml.parsers.ParserConfigurationException; +/* */ import javax.xml.parsers.SAXParser; +/* */ import javax.xml.parsers.SAXParserFactory; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.xml.sax.InputSource; +/* */ import org.xml.sax.SAXException; +/* */ +/* */ public class OCRProcessFileThread extends ProcessFileThread +/* */ { +/* */ public OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2, File file, String lfn, HashMap tl, String mdProviderUrl, String preferedLanguage, HashMap languageToISO, HashMap supportedLanguageFolder) +/* */ { +/* 43 */ super(languageAnalyzers2, file, lfn, tl, mdProviderUrl, preferedLanguage, languageToISO, supportedLanguageFolder); +/* */ } +/* */ +/* */ protected Boolean addDocument(File file, String lang, String dcMetaData, String textId) +/* */ throws CorruptIndexException, IOException, FileNotFoundException, UnsupportedEncodingException, ParserConfigurationException, SAXException +/* */ { +/* 52 */ ParseOcrDocument ch = new ParseOcrDocument(); +/* */ +/* 59 */ SAXParserFactory factory = SAXParserFactory.newInstance(); +/* 60 */ factory.setNamespaceAware(true); +/* 61 */ factory.setValidating(false); +/* */ +/* 64 */ SAXParser parser = factory.newSAXParser(); +/* */ try +/* */ { +/* 67 */ Reader reader = new FileReader(file); +/* 68 */ InputSource input = new InputSource(reader); +/* */ +/* 73 */ parser.parse(input, ch); +/* */ } +/* */ catch (SAXException e) +/* */ { +/* 78 */ e.printStackTrace(); +/* 79 */ return Boolean.valueOf(false); +/* */ } +/* */ catch (IOException e) { +/* 82 */ e.printStackTrace(); +/* */ try { +/* 84 */ sleep(1L); +/* 85 */ System.out.println("retry"); +/* 86 */ addDocument(file, lang, dcMetaData, textId); +/* */ } +/* */ catch (InterruptedException e1) { +/* 89 */ e1.printStackTrace(); +/* */ } +/* 91 */ return Boolean.valueOf(false); +/* */ } +/* */ +/* 94 */ OCRDocument doc = ch.ocrDocument; +/* */ +/* 96 */ for (OCRDocument.OCRLine line : doc.OCRLines) +/* */ { +/* 98 */ if (dcMetaData == null) +/* 99 */ this.languageAnalyzers.addDocument(OcropusLineDocument.Document(file, absPathToTextId(file),lang, line, doc.pageDimension, textId), lang); +/* */ else { +/* 101 */ this.languageAnalyzers.addDocument(OcropusLineDocument.Document(file, absPathToTextId(file),lang, line, doc.pageDimension, dcMetaData, textId), lang); +/* */ } +/* */ } +/* 104 */ return Boolean.valueOf(true); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/ProcessFileThread.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/.svn/text-base/ProcessFileThread.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,548 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.processors; +/* */ +/* */ import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.lucene.document.DateTools; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta; +import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +import de.mpiwg.dwinter.lucencetools.documents.FileDocument; +/* */ +/* */ public class ProcessFileThread extends Thread +/* */ { +/* */ private static final String TEXTIDFROMPATH_REGEXP = ".*(/(permanent|experimental)/.*)"; +/* */ private static final int DELETED_WRONG_LANGUAGE = 1; +/* */ private static final int DELETED_OLD_VERSION = 2; +/* */ private static final int NEW_FILE = 0; +/* */ private static final int FILE_EXISTS = -1; +/* */ protected File docDir; +/* */ protected File index_dir; +/* 86 */ protected ArrayList fileTypesToIndex = new ArrayList(Arrays.asList(new String[] { "xml" })); +/* 87 */ protected ArrayList excludeFolders = new ArrayList(Arrays.asList(new String[] { "OCR" })); +/* 88 */ private HashMap textLanguage = null; +/* */ +/* 90 */ protected HashMap languageToISO = new HashMap(); +/* 91 */ protected LanguageAnalyzers languageAnalyzers = new LanguageAnalyzers(); +/* 92 */ protected HashMap supportedLanguageFolder = new HashMap(); +/* 93 */ private int counter = 0; +/* */ protected String languageFileName; +/* 95 */ protected boolean indexMetaPriority = false; +/* 96 */ protected boolean deduceFromFolderPriority = true; +/* */ +/* 101 */ private String specialMode = ""; +/* 102 */ public boolean done = false; +/* */ private File processThisFile; +/* 105 */ private String mode = "new"; // if mode is not add, then only modified files and new files will be added. +/* */ private String mdProviderUrl; +/* 107 */ private String preferedLanguage = null; +/* */ +/* */ public ProcessFileThread(File docDir, File index_dir, String languageFileName, File processThisFile, String mdProviderUrl, HashMap languageToISO, HashMap supportedLanguageFolder) throws CorruptIndexException, LockObtainFailedException, IOException { +/* 110 */ this.docDir = docDir; +/* 111 */ this.languageFileName = languageFileName; +/* */ +/* 114 */ this.index_dir = index_dir; +/* 115 */ this.processThisFile = processThisFile; +/* 116 */ this.mdProviderUrl = mdProviderUrl; +/* 117 */ this.languageToISO = languageToISO; +/* 118 */ this.supportedLanguageFolder = supportedLanguageFolder; +/* */ } +/* */ +/* */ public ProcessFileThread(LanguageAnalyzers languageAnalyzers2, File file, String lfn, HashMap tl, String mdProviderUrl, String preferedLanguage, HashMap languageToISO, HashMap supportedLanguageFolder) +/* */ { +/* 123 */ this.languageAnalyzers = languageAnalyzers2; +/* 124 */ this.processThisFile = file; +/* 125 */ this.textLanguage = tl; +/* */ +/* 128 */ this.languageFileName = lfn; +/* 129 */ this.preferedLanguage = preferedLanguage; +/* 130 */ this.mdProviderUrl = mdProviderUrl; +/* 131 */ this.languageToISO = languageToISO; +/* 132 */ this.supportedLanguageFolder = supportedLanguageFolder; +/* */ } +/* */ +/* */ public void run() +/* */ { +/* */ try +/* */ { +/* 140 */ processFile(this.processThisFile); +/* */ } +/* */ catch (CorruptIndexException e) { +/* 143 */ e.printStackTrace(); +/* */ } +/* */ catch (FileNotFoundException e) { +/* 146 */ e.printStackTrace(); +/* */ } +/* */ catch (UnsupportedEncodingException e) { +/* 149 */ e.printStackTrace(); +/* */ } +/* */ catch (IOException e) { +/* 152 */ e.printStackTrace(); +/* */ } +/* */ catch (InterruptedException e) { +/* 155 */ e.printStackTrace(); +/* */ } +/* 157 */ this.done = true; +/* */ } +/* */ +/* */ private String getLanguageOfText(String textId, File file) throws IOException { +/* 161 */ String lang = null; +/* */ +/* 163 */ if (this.deduceFromFolderPriority) +/* */ { +/* 165 */ lang = deduceFromFolderName(file); +/* 166 */ if (lang != null) { +/* 167 */ return lang; +/* */ } +/* */ } +/* 170 */ if ((this.languageFileName == null | this.indexMetaPriority)) { +/* 171 */ lang = getLanguageFromIndexMeta(file); +/* */ +/* 177 */ if ((lang != null) && +/* 178 */ (lang.equals(""))) { +/* 179 */ System.out.println("Language for " + file.getAbsolutePath() + " is " + lang); +/* 180 */ return lang; +/* */ } +/* */ } +/* 183 */ if (this.languageFileName != null) +/* */ { +/* 185 */ if (this.textLanguage == null) +/* 186 */ this.textLanguage = loadLanguages(); +/* 187 */ if (this.textLanguage == null) +/* */ { +/* 189 */ System.out.println("NO LANGUAGE FILES LOADED"); +/* */ } +/* */ else +/* */ { +/* 198 */ String language = (String)this.textLanguage.get(textId); +/* 199 */ lang = (String)this.languageToISO.get(language); +/* 200 */ if (lang != null) +/* */ { +/* 202 */ System.out.println("GOT language from language file:" + lang); +/* 203 */ return lang; +/* */ } +/* */ } +/* */ +/* */ } +/* */ +/* 209 */ lang = deduceFromFolderName(file); +/* 210 */ if (lang != null) +/* */ { +/* 212 */ System.out.println("Langugage deduced from Folder:" + lang); +/* 213 */ return lang; +/* */ } +/* */ +/* 216 */ if ((this.preferedLanguage != null) && (!this.preferedLanguage.equals(""))) { +/* 217 */ System.out.println("no language identified from Metadata: prefered language " + this.preferedLanguage + "will be used:" + file.getAbsolutePath()); +/* 218 */ return this.preferedLanguage; +/* */ } +/* */ +/* 221 */ System.out.println("no language identified: language will be generic all:" + file.getAbsolutePath()); +/* 222 */ return "all"; +/* */ } +/* */ +/* */ private String deduceFromFolderName(File file) { +/* 226 */ File parent = file.getParentFile(); +/* 227 */ String name = parent.getName(); +/* 228 */ String lang = null; +/* 229 */ if (this.supportedLanguageFolder.containsKey(name)) +/* */ { +/* 231 */ lang = (String)this.supportedLanguageFolder.get(name); +/* */ } +/* 233 */ return lang; +/* */ } +/* */ +/* */ private String getLanguageFromIndexMeta(File file) +/* */ throws IOException +/* */ { +/* 244 */ file = new File("/mpiwg/online/" + absPathToTextId(file.getAbsolutePath())); +/* */ +/* 246 */ File pf = file.getParentFile().getParentFile().getParentFile(); +/* 247 */ File indexMeta = new File(pf, "index.meta"); +/* */ +/* 249 */ if (!indexMeta.exists()) +/* */ { +/* 251 */ File pf2 = pf.getParentFile(); +/* 252 */ indexMeta = new File(pf2, "index.meta"); +/* 253 */ if (!indexMeta.exists()) +/* 254 */ return null; +/* */ } +/* 256 */ XMLReader parser = new SAXParser(); +/* 257 */ ParseIndexMeta ch = new ParseIndexMeta(); +/* 258 */ parser.setContentHandler(ch); +/* */ try { +/* 260 */ Reader reader = new FileReader(indexMeta); +/* 261 */ InputSource input = new InputSource(reader); +/* 262 */ parser.parse(input); +/* */ } +/* */ catch (SAXException e) +/* */ { +/* 266 */ e.printStackTrace(); +/* */ } +/* */ +/* 269 */ String lang = ch.lang; +/* */ +/* 272 */ return lang; +/* */ } +/* */ +/* */ private String getDCFromIndexMeta(String textId) +/* */ throws IOException, XmlRpcException +/* */ { +/* 301 */ XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); +/* 302 */ URL url = new URL(this.mdProviderUrl); +/* 303 */ config.setServerURL(url); +/* 304 */ XmlRpcClient client = new XmlRpcClient(); +/* 305 */ client.setConfig(config); +/* */ +/* 307 */ Object[] params = { textId }; +/* 308 */ Object returnVals = client.execute("getDCFormatted", params); +/* */ +/* 311 */ return (String)returnVals; +/* */ } +/* */ +/* */ protected HashMap loadLanguages() +/* */ { +/* 320 */ File languageFile = new File(this.docDir + "/" + this.languageFileName); +/* 321 */ String languageFilePath = this.docDir + "/" + this.languageFileName; +/* 322 */ HashMap languages = new HashMap(); +/* 323 */ boolean relativ = true; +/* */ +/* 325 */ if (!languageFile.exists()) +/* */ { +/* 327 */ languageFile = new File(this.languageFileName); +/* 328 */ languageFilePath = this.languageFileName; +/* 329 */ relativ = false; +/* 330 */ if (!languageFile.exists()) +/* 331 */ return null; +/* */ } +/* */ BufferedReader in; +/* */ try { +/* 335 */ in = new BufferedReader(new FileReader(languageFilePath)); +/* */ } catch (FileNotFoundException e) { +/* 337 */ return null; +/* */ } +/* */ +/* 340 */ String zeile = null; +/* */ try { +/* 342 */ while ((zeile = in.readLine()) != null) { +/* 343 */ String[] splitted = zeile.replace("\"", "").split("[,]"); +/* 344 */ if (splitted.length == 2) +/* 345 */ if (relativ) +/* 346 */ languages.put(this.docDir + "/" + splitted[0], splitted[1]); +/* */ else +/* 348 */ languages.put(splitted[0], splitted[1]); +/* */ } +/* */ } +/* */ catch (IOException e) { +/* 352 */ e.printStackTrace(); +/* 353 */ return null; +/* */ } +/* */ +/* 356 */ return languages; +/* */ } +/* */ +/* */ public void harvestFolder() +/* */ throws InterruptedException +/* */ { +/* 362 */ Date start = new Date(); +/* 363 */ boolean create = true; +/* */ try +/* */ { +/* 374 */ System.out.println("Indexing to directory '" + this.index_dir + "'..."); +/* 375 */ indexDocs(this.docDir); +/* 376 */ System.out.println("Optimizing..."); +/* 377 */ this.languageAnalyzers.optimize(); +/* 378 */ this.languageAnalyzers.close(); +/* */ +/* 380 */ Date end = new Date(); +/* 381 */ System.out.println(end.getTime() - start.getTime() + " total milliseconds"); +/* */ } +/* */ catch (IOException e) { +/* 384 */ System.out.println(" caught a " + e.getClass() + +/* 385 */ "\n with message: " + e.getMessage()); +/* */ } +/* */ } +/* */ +/* */ void indexDocs(File file) +/* */ throws IOException, InterruptedException +/* */ { +/* 392 */ if (!file.canRead()) +/* */ return; +/* 394 */ if (file.isDirectory()) +/* */ { +/* 396 */ if (this.counter > 100000) +/* */ { +/* 398 */ return; +/* */ } +/* 400 */ String[] files = file.list(); +/* */ +/* 402 */ String folderName = file.getName(); +/* 403 */ if ((((files != null) ? 1 : 0) & ((this.excludeFolders.contains(folderName)) ? 0 : 1)) != 0) { +/* 404 */ for (int i = 0; i < files.length; ++i) +/* 405 */ indexDocs(new File(file, files[i])); +/* */ } +/* */ } +/* 408 */ else if (isTextFile(file)) { +/* 409 */ processFile(file); +/* */ } +/* */ else { +/* 412 */ System.out.println("not adding " + file); +/* */ } +/* */ } +/* */ +/* */ private void processFile(File file) +/* */ throws IOException, CorruptIndexException, InterruptedException, FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 423 */ String textId = getTextId(file); +/* 424 */ System.out.println("file:" + this.counter); +/* 425 */ System.out.println("textId:" + textId); +/* */ +/* 427 */ String lang = getLanguageOfText(textId, file); +/* 428 */ String dcMetaData = null; +/* 429 */ if (this.mdProviderUrl != null) +/* */ try { +/* 431 */ dcMetaData = getDCFromIndexMeta(textId); +/* */ } catch (XmlRpcException e2) { +/* 433 */ dcMetaData = null; +/* */ } +/* */ int docNr; +/* */ +/* 437 */ if (this.mode == "add") +/* 438 */ docNr = 0; +/* */ else +/* 440 */ docNr = checkFileAndRemoveOldFile(file.getCanonicalPath(), lang, true, file.lastModified()); +/* 441 */ if (lang == null) { +/* 442 */ System.out.println("not adding " + file); +/* */ } +/* 444 */ else if (docNr == -1) { +/* 445 */ System.out.println(" OLD FILE:" + file); +/* 446 */ } else if (docNr >= 0) +/* */ { +/* 448 */ System.out.println("adding " + file + " lang: " + lang); +/* */ try +/* */ { +/* 451 */ Boolean ret = addDocument(file, lang, dcMetaData, textId); +/* 452 */ if (ret.booleanValue()) +/* 453 */ this.counter += 1; +/* */ } catch (IOException e) { +/* 455 */ System.out.println("got an IO eception adding the document - wait a bit"); +/* 456 */ Thread.sleep(10000L); +/* 457 */ System.out.println("Try again"); +/* */ try { +/* 459 */ Boolean ret = addDocument(file, lang, dcMetaData, textId); +/* 460 */ if (ret.booleanValue()) +/* 461 */ this.counter += 1; +/* */ } catch (IOException e1) { +/* 463 */ System.out.println("Couldn't do:" + file.getName()); +/* */ } +/* */ catch (ParserConfigurationException e2) { +/* 466 */ e.printStackTrace(); +/* */ } +/* */ catch (SAXException e2) { +/* 469 */ e.printStackTrace(); +/* */ } +/* */ } +/* */ catch (ParserConfigurationException e) { +/* 473 */ e.printStackTrace(); +/* */ } +/* */ catch (SAXException e) { +/* 476 */ e.printStackTrace(); +/* */ } +/* */ +/* */ } +/* */ else +/* */ { +/* 482 */ System.out.println(" UPDATE FILE:" + file + " lang: " + lang); +/* */ +/* 484 */ this.counter += 1; +/* */ try { +/* 486 */ addDocument(file, lang, dcMetaData, textId); +/* */ } +/* */ catch (ParserConfigurationException e) { +/* 489 */ e.printStackTrace(); +/* */ } +/* */ catch (SAXException e) { +/* 492 */ e.printStackTrace(); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected Boolean addDocument(File file, String lang, String dcMetaData, String textId) +/* */ throws CorruptIndexException, IOException, FileNotFoundException, UnsupportedEncodingException, ParserConfigurationException, SAXException +/* */ { +/* 509 */ if (dcMetaData != null) { +/* 510 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),lang, dcMetaData, textId), lang); +/* 511 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),"all", dcMetaData, textId), "all"); +/* */ } +/* */ else +/* */ { +/* 515 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),lang, textId), lang); +/* 516 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),"all", textId), "all"); +/* */ } +/* 518 */ return Boolean.valueOf(true); +/* */ } +/* */ +/* */ private String getTextId(File file) +/* */ { +/* */ try +/* */ { +/* 529 */ File parent = file.getParentFile(); +/* */ +/* 531 */ if (parent.getName().equals("text")) +/* 532 */ return absPathToTextId(parent.getParentFile().getAbsolutePath()); +/* 533 */ if (parent.getParentFile().getName().equals("text")) +/* 534 */ return absPathToTextId(parent.getParentFile().getParentFile().getAbsolutePath()); +/* 535 */ if (parent.getParentFile().getParentFile().getName().equals("text")) { +/* 536 */ return absPathToTextId(parent.getParentFile().getParentFile().getParentFile().getAbsolutePath()); +/* */ } +/* 538 */ return null; +/* */ } +/* */ catch (RuntimeException e) { +/* 541 */ e.printStackTrace(); +/* 542 */ }return null; +/* */ } +/* */ + protected String absPathToTextId(File file) + /* */ { + try { + return absPathToTextId(file.getCanonicalPath()); + } catch (IOException e) { + + e.printStackTrace(); + return ""; + } + } + +/* */ protected String absPathToTextId(String absolutePath) +/* */ { +/* 555 */ if (this.specialMode.equals("vlp")) +/* */ { +/* 557 */ String[] splitted = absolutePath.split("lit"); +/* 558 */ return splitted[1]; +/* */ } +/* */ +/* 562 */ Pattern p = Pattern.compile(TEXTIDFROMPATH_REGEXP); +/* 563 */ Matcher m = p.matcher(absolutePath); +/* 564 */ m.matches(); +/* 565 */ if (m.groupCount() > 0) { +/* 566 */ return m.group(1); +/* */ } +/* 568 */ System.err.println("correctPath: not a mpiwg path / no changes done" + absolutePath); +/* 569 */ return absolutePath; +/* */ } +/* */ +/* */ private int checkFileAndRemoveOldFile(String filePath, String lang, boolean deleteWrongLanguage, long fileModDate) +/* */ throws CorruptIndexException, IOException +/* */ { +/* 577 */ lang = checkSupportedLanguages(lang); +/* 578 */ System.out.println("lang converted+" + lang); +/* 579 */ //TermQuery query = new TermQuery(new Term("path", filePath)); + TermQuery query = new TermQuery(new Term("cleanedPath", absPathToTextId(filePath))); +/* */ +/* 582 */ HashMap results = this.languageAnalyzers.search(query); +/* */ +/* 584 */ if (results == null) { +/* 585 */ return 0; +/* */ } +/* 587 */ for (String resultLang : results.keySet()) +/* */ { +/* 589 */ TopScoreDocCollector collector = (TopScoreDocCollector)results.get(resultLang); +/* */ +/* 591 */ if ((collector == null) || (collector.getTotalHits() <= 0)) +/* */ continue; +/* 593 */ if ((!resultLang.equals(lang)) && (deleteWrongLanguage) && (!resultLang.equals("morph"))) +/* */ { +/* 595 */ this.languageAnalyzers.deleteDocuments(query); +/* */ +/* 603 */ System.out.println("language changed:" + filePath); +/* 604 */ return 1; +/* */ } +/* */ +/* 607 */ if (!resultLang.equals(lang)) +/* */ continue; +/* 609 */ TopDocs docs = collector.topDocs(); +/* */ ScoreDoc[] arrayOfScoreDoc; +/* 610 */ if ((arrayOfScoreDoc = docs.scoreDocs).length == 0) continue; ScoreDoc doc = arrayOfScoreDoc[0]; +/* 611 */ String modDate = this.languageAnalyzers.getAnalyzer(resultLang).reader.document(doc.doc).getField("modified").stringValue(); +/* */ +/* 613 */ String fileDate = DateTools.timeToString(fileModDate, DateTools.Resolution.MINUTE); +/* 614 */ if (!fileDate.equals(modDate)) +/* */ { +/* 618 */ System.out.println("new file:" + filePath); +/* 619 */ this.languageAnalyzers.deleteDocuments(query); +/* 620 */ return 2; +/* */ } +/* */ +/* 623 */ return -1; +/* */ } +/* */ +/* 631 */ return 0; +/* */ } +/* */ +/* */ private String checkSupportedLanguages(String lang) +/* */ { +/* 643 */ if (this.languageAnalyzers.getAnalyzer(lang) == null) +/* 644 */ return "all"; +/* 645 */ return lang; +/* */ } +/* */ +/* */ public void setIndexMetaPriority(boolean prio) +/* */ { +/* 650 */ this.indexMetaPriority = prio; +/* */ } +/* */ +/* */ public boolean getIndexMetaPriority() { +/* 654 */ return this.indexMetaPriority; +/* */ } +/* */ +/* */ private boolean isTextFile(File file) +/* */ { +/* 659 */ String fn = file.getName(); +/* */ +/* 661 */ String[] splitted = fn.split("[.]"); +/* */ +/* 663 */ String ext = ""; +/* */ +/* 665 */ if (splitted.length > 1) +/* */ { +/* 667 */ ext = splitted[(splitted.length - 1)]; +/* */ } +/* */ +/* 670 */ return this.fileTypesToIndex.contains(ext); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/OCRProcessFileThread.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/OCRProcessFileThread.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,82 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.processors; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; +/* */ import de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument; +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +/* */ import de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument; +/* */ import java.io.File; +/* */ import java.io.FileNotFoundException; +/* */ import java.io.FileReader; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.io.Reader; +/* */ import java.io.UnsupportedEncodingException; +/* */ import java.util.HashMap; +/* */ import javax.xml.parsers.ParserConfigurationException; +/* */ import javax.xml.parsers.SAXParser; +/* */ import javax.xml.parsers.SAXParserFactory; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.xml.sax.InputSource; +/* */ import org.xml.sax.SAXException; +/* */ +/* */ public class OCRProcessFileThread extends ProcessFileThread +/* */ { +/* */ public OCRProcessFileThread(LanguageAnalyzers languageAnalyzers2, File file, String lfn, HashMap tl, String mdProviderUrl, String preferedLanguage, HashMap languageToISO, HashMap supportedLanguageFolder) +/* */ { +/* 43 */ super(languageAnalyzers2, file, lfn, tl, mdProviderUrl, preferedLanguage, languageToISO, supportedLanguageFolder); +/* */ } +/* */ +/* */ protected Boolean addDocument(File file, String lang, String dcMetaData, String textId) +/* */ throws CorruptIndexException, IOException, FileNotFoundException, UnsupportedEncodingException, ParserConfigurationException, SAXException +/* */ { +/* 52 */ ParseOcrDocument ch = new ParseOcrDocument(); +/* */ +/* 59 */ SAXParserFactory factory = SAXParserFactory.newInstance(); +/* 60 */ factory.setNamespaceAware(true); +/* 61 */ factory.setValidating(false); +/* */ +/* 64 */ SAXParser parser = factory.newSAXParser(); +/* */ try +/* */ { +/* 67 */ Reader reader = new FileReader(file); +/* 68 */ InputSource input = new InputSource(reader); +/* */ +/* 73 */ parser.parse(input, ch); +/* */ } +/* */ catch (SAXException e) +/* */ { +/* 78 */ e.printStackTrace(); +/* 79 */ return Boolean.valueOf(false); +/* */ } +/* */ catch (IOException e) { +/* 82 */ e.printStackTrace(); +/* */ try { +/* 84 */ sleep(1L); +/* 85 */ System.out.println("retry"); +/* 86 */ addDocument(file, lang, dcMetaData, textId); +/* */ } +/* */ catch (InterruptedException e1) { +/* 89 */ e1.printStackTrace(); +/* */ } +/* 91 */ return Boolean.valueOf(false); +/* */ } +/* */ +/* 94 */ OCRDocument doc = ch.ocrDocument; +/* */ +/* 96 */ for (OCRDocument.OCRLine line : doc.OCRLines) +/* */ { +/* 98 */ if (dcMetaData == null) +/* 99 */ this.languageAnalyzers.addDocument(OcropusLineDocument.Document(file, absPathToTextId(file),lang, line, doc.pageDimension, textId), lang); +/* */ else { +/* 101 */ this.languageAnalyzers.addDocument(OcropusLineDocument.Document(file, absPathToTextId(file),lang, line, doc.pageDimension, dcMetaData, textId), lang); +/* */ } +/* */ } +/* 104 */ return Boolean.valueOf(true); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.processors.OCRProcessFileThread + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/ProcessFileThread.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/harvester/processors/ProcessFileThread.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,548 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.harvester.processors; +/* */ +/* */ import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.ParserConfigurationException; + +import org.apache.lucene.document.DateTools; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.xmlrpc.XmlRpcException; +import org.apache.xmlrpc.client.XmlRpcClient; +import org.apache.xmlrpc.client.XmlRpcClientConfigImpl; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta; +import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers; +import de.mpiwg.dwinter.lucencetools.documents.FileDocument; +/* */ +/* */ public class ProcessFileThread extends Thread +/* */ { +/* */ private static final String TEXTIDFROMPATH_REGEXP = ".*(/(permanent|experimental)/.*)"; +/* */ private static final int DELETED_WRONG_LANGUAGE = 1; +/* */ private static final int DELETED_OLD_VERSION = 2; +/* */ private static final int NEW_FILE = 0; +/* */ private static final int FILE_EXISTS = -1; +/* */ protected File docDir; +/* */ protected File index_dir; +/* 86 */ protected ArrayList fileTypesToIndex = new ArrayList(Arrays.asList(new String[] { "xml" })); +/* 87 */ protected ArrayList excludeFolders = new ArrayList(Arrays.asList(new String[] { "OCR" })); +/* 88 */ private HashMap textLanguage = null; +/* */ +/* 90 */ protected HashMap languageToISO = new HashMap(); +/* 91 */ protected LanguageAnalyzers languageAnalyzers = new LanguageAnalyzers(); +/* 92 */ protected HashMap supportedLanguageFolder = new HashMap(); +/* 93 */ private int counter = 0; +/* */ protected String languageFileName; +/* 95 */ protected boolean indexMetaPriority = false; +/* 96 */ protected boolean deduceFromFolderPriority = true; +/* */ +/* 101 */ private String specialMode = ""; +/* 102 */ public boolean done = false; +/* */ private File processThisFile; +/* 105 */ private String mode = "new"; // if mode is not add, then only modified files and new files will be added. +/* */ private String mdProviderUrl; +/* 107 */ private String preferedLanguage = null; +/* */ +/* */ public ProcessFileThread(File docDir, File index_dir, String languageFileName, File processThisFile, String mdProviderUrl, HashMap languageToISO, HashMap supportedLanguageFolder) throws CorruptIndexException, LockObtainFailedException, IOException { +/* 110 */ this.docDir = docDir; +/* 111 */ this.languageFileName = languageFileName; +/* */ +/* 114 */ this.index_dir = index_dir; +/* 115 */ this.processThisFile = processThisFile; +/* 116 */ this.mdProviderUrl = mdProviderUrl; +/* 117 */ this.languageToISO = languageToISO; +/* 118 */ this.supportedLanguageFolder = supportedLanguageFolder; +/* */ } +/* */ +/* */ public ProcessFileThread(LanguageAnalyzers languageAnalyzers2, File file, String lfn, HashMap tl, String mdProviderUrl, String preferedLanguage, HashMap languageToISO, HashMap supportedLanguageFolder) +/* */ { +/* 123 */ this.languageAnalyzers = languageAnalyzers2; +/* 124 */ this.processThisFile = file; +/* 125 */ this.textLanguage = tl; +/* */ +/* 128 */ this.languageFileName = lfn; +/* 129 */ this.preferedLanguage = preferedLanguage; +/* 130 */ this.mdProviderUrl = mdProviderUrl; +/* 131 */ this.languageToISO = languageToISO; +/* 132 */ this.supportedLanguageFolder = supportedLanguageFolder; +/* */ } +/* */ +/* */ public void run() +/* */ { +/* */ try +/* */ { +/* 140 */ processFile(this.processThisFile); +/* */ } +/* */ catch (CorruptIndexException e) { +/* 143 */ e.printStackTrace(); +/* */ } +/* */ catch (FileNotFoundException e) { +/* 146 */ e.printStackTrace(); +/* */ } +/* */ catch (UnsupportedEncodingException e) { +/* 149 */ e.printStackTrace(); +/* */ } +/* */ catch (IOException e) { +/* 152 */ e.printStackTrace(); +/* */ } +/* */ catch (InterruptedException e) { +/* 155 */ e.printStackTrace(); +/* */ } +/* 157 */ this.done = true; +/* */ } +/* */ +/* */ private String getLanguageOfText(String textId, File file) throws IOException { +/* 161 */ String lang = null; +/* */ +/* 163 */ if (this.deduceFromFolderPriority) +/* */ { +/* 165 */ lang = deduceFromFolderName(file); +/* 166 */ if (lang != null) { +/* 167 */ return lang; +/* */ } +/* */ } +/* 170 */ if ((this.languageFileName == null | this.indexMetaPriority)) { +/* 171 */ lang = getLanguageFromIndexMeta(file); +/* */ +/* 177 */ if ((lang != null) && +/* 178 */ (lang.equals(""))) { +/* 179 */ System.out.println("Language for " + file.getAbsolutePath() + " is " + lang); +/* 180 */ return lang; +/* */ } +/* */ } +/* 183 */ if (this.languageFileName != null) +/* */ { +/* 185 */ if (this.textLanguage == null) +/* 186 */ this.textLanguage = loadLanguages(); +/* 187 */ if (this.textLanguage == null) +/* */ { +/* 189 */ System.out.println("NO LANGUAGE FILES LOADED"); +/* */ } +/* */ else +/* */ { +/* 198 */ String language = (String)this.textLanguage.get(textId); +/* 199 */ lang = (String)this.languageToISO.get(language); +/* 200 */ if (lang != null) +/* */ { +/* 202 */ System.out.println("GOT language from language file:" + lang); +/* 203 */ return lang; +/* */ } +/* */ } +/* */ +/* */ } +/* */ +/* 209 */ lang = deduceFromFolderName(file); +/* 210 */ if (lang != null) +/* */ { +/* 212 */ System.out.println("Langugage deduced from Folder:" + lang); +/* 213 */ return lang; +/* */ } +/* */ +/* 216 */ if ((this.preferedLanguage != null) && (!this.preferedLanguage.equals(""))) { +/* 217 */ System.out.println("no language identified from Metadata: prefered language " + this.preferedLanguage + "will be used:" + file.getAbsolutePath()); +/* 218 */ return this.preferedLanguage; +/* */ } +/* */ +/* 221 */ System.out.println("no language identified: language will be generic all:" + file.getAbsolutePath()); +/* 222 */ return "all"; +/* */ } +/* */ +/* */ private String deduceFromFolderName(File file) { +/* 226 */ File parent = file.getParentFile(); +/* 227 */ String name = parent.getName(); +/* 228 */ String lang = null; +/* 229 */ if (this.supportedLanguageFolder.containsKey(name)) +/* */ { +/* 231 */ lang = (String)this.supportedLanguageFolder.get(name); +/* */ } +/* 233 */ return lang; +/* */ } +/* */ +/* */ private String getLanguageFromIndexMeta(File file) +/* */ throws IOException +/* */ { +/* 244 */ file = new File("/mpiwg/online/" + absPathToTextId(file.getAbsolutePath())); +/* */ +/* 246 */ File pf = file.getParentFile().getParentFile().getParentFile(); +/* 247 */ File indexMeta = new File(pf, "index.meta"); +/* */ +/* 249 */ if (!indexMeta.exists()) +/* */ { +/* 251 */ File pf2 = pf.getParentFile(); +/* 252 */ indexMeta = new File(pf2, "index.meta"); +/* 253 */ if (!indexMeta.exists()) +/* 254 */ return null; +/* */ } +/* 256 */ XMLReader parser = new SAXParser(); +/* 257 */ ParseIndexMeta ch = new ParseIndexMeta(); +/* 258 */ parser.setContentHandler(ch); +/* */ try { +/* 260 */ Reader reader = new FileReader(indexMeta); +/* 261 */ InputSource input = new InputSource(reader); +/* 262 */ parser.parse(input); +/* */ } +/* */ catch (SAXException e) +/* */ { +/* 266 */ e.printStackTrace(); +/* */ } +/* */ +/* 269 */ String lang = ch.lang; +/* */ +/* 272 */ return lang; +/* */ } +/* */ +/* */ private String getDCFromIndexMeta(String textId) +/* */ throws IOException, XmlRpcException +/* */ { +/* 301 */ XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl(); +/* 302 */ URL url = new URL(this.mdProviderUrl); +/* 303 */ config.setServerURL(url); +/* 304 */ XmlRpcClient client = new XmlRpcClient(); +/* 305 */ client.setConfig(config); +/* */ +/* 307 */ Object[] params = { textId }; +/* 308 */ Object returnVals = client.execute("getDCFormatted", params); +/* */ +/* 311 */ return (String)returnVals; +/* */ } +/* */ +/* */ protected HashMap loadLanguages() +/* */ { +/* 320 */ File languageFile = new File(this.docDir + "/" + this.languageFileName); +/* 321 */ String languageFilePath = this.docDir + "/" + this.languageFileName; +/* 322 */ HashMap languages = new HashMap(); +/* 323 */ boolean relativ = true; +/* */ +/* 325 */ if (!languageFile.exists()) +/* */ { +/* 327 */ languageFile = new File(this.languageFileName); +/* 328 */ languageFilePath = this.languageFileName; +/* 329 */ relativ = false; +/* 330 */ if (!languageFile.exists()) +/* 331 */ return null; +/* */ } +/* */ BufferedReader in; +/* */ try { +/* 335 */ in = new BufferedReader(new FileReader(languageFilePath)); +/* */ } catch (FileNotFoundException e) { +/* 337 */ return null; +/* */ } +/* */ +/* 340 */ String zeile = null; +/* */ try { +/* 342 */ while ((zeile = in.readLine()) != null) { +/* 343 */ String[] splitted = zeile.replace("\"", "").split("[,]"); +/* 344 */ if (splitted.length == 2) +/* 345 */ if (relativ) +/* 346 */ languages.put(this.docDir + "/" + splitted[0], splitted[1]); +/* */ else +/* 348 */ languages.put(splitted[0], splitted[1]); +/* */ } +/* */ } +/* */ catch (IOException e) { +/* 352 */ e.printStackTrace(); +/* 353 */ return null; +/* */ } +/* */ +/* 356 */ return languages; +/* */ } +/* */ +/* */ public void harvestFolder() +/* */ throws InterruptedException +/* */ { +/* 362 */ Date start = new Date(); +/* 363 */ boolean create = true; +/* */ try +/* */ { +/* 374 */ System.out.println("Indexing to directory '" + this.index_dir + "'..."); +/* 375 */ indexDocs(this.docDir); +/* 376 */ System.out.println("Optimizing..."); +/* 377 */ this.languageAnalyzers.optimize(); +/* 378 */ this.languageAnalyzers.close(); +/* */ +/* 380 */ Date end = new Date(); +/* 381 */ System.out.println(end.getTime() - start.getTime() + " total milliseconds"); +/* */ } +/* */ catch (IOException e) { +/* 384 */ System.out.println(" caught a " + e.getClass() + +/* 385 */ "\n with message: " + e.getMessage()); +/* */ } +/* */ } +/* */ +/* */ void indexDocs(File file) +/* */ throws IOException, InterruptedException +/* */ { +/* 392 */ if (!file.canRead()) +/* */ return; +/* 394 */ if (file.isDirectory()) +/* */ { +/* 396 */ if (this.counter > 100000) +/* */ { +/* 398 */ return; +/* */ } +/* 400 */ String[] files = file.list(); +/* */ +/* 402 */ String folderName = file.getName(); +/* 403 */ if ((((files != null) ? 1 : 0) & ((this.excludeFolders.contains(folderName)) ? 0 : 1)) != 0) { +/* 404 */ for (int i = 0; i < files.length; ++i) +/* 405 */ indexDocs(new File(file, files[i])); +/* */ } +/* */ } +/* 408 */ else if (isTextFile(file)) { +/* 409 */ processFile(file); +/* */ } +/* */ else { +/* 412 */ System.out.println("not adding " + file); +/* */ } +/* */ } +/* */ +/* */ private void processFile(File file) +/* */ throws IOException, CorruptIndexException, InterruptedException, FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 423 */ String textId = getTextId(file); +/* 424 */ System.out.println("file:" + this.counter); +/* 425 */ System.out.println("textId:" + textId); +/* */ +/* 427 */ String lang = getLanguageOfText(textId, file); +/* 428 */ String dcMetaData = null; +/* 429 */ if (this.mdProviderUrl != null) +/* */ try { +/* 431 */ dcMetaData = getDCFromIndexMeta(textId); +/* */ } catch (XmlRpcException e2) { +/* 433 */ dcMetaData = null; +/* */ } +/* */ int docNr; +/* */ +/* 437 */ if (this.mode == "add") +/* 438 */ docNr = 0; +/* */ else +/* 440 */ docNr = checkFileAndRemoveOldFile(file.getCanonicalPath(), lang, true, file.lastModified()); +/* 441 */ if (lang == null) { +/* 442 */ System.out.println("not adding " + file); +/* */ } +/* 444 */ else if (docNr == -1) { +/* 445 */ System.out.println(" OLD FILE:" + file); +/* 446 */ } else if (docNr >= 0) +/* */ { +/* 448 */ System.out.println("adding " + file + " lang: " + lang); +/* */ try +/* */ { +/* 451 */ Boolean ret = addDocument(file, lang, dcMetaData, textId); +/* 452 */ if (ret.booleanValue()) +/* 453 */ this.counter += 1; +/* */ } catch (IOException e) { +/* 455 */ System.out.println("got an IO eception adding the document - wait a bit"); +/* 456 */ Thread.sleep(10000L); +/* 457 */ System.out.println("Try again"); +/* */ try { +/* 459 */ Boolean ret = addDocument(file, lang, dcMetaData, textId); +/* 460 */ if (ret.booleanValue()) +/* 461 */ this.counter += 1; +/* */ } catch (IOException e1) { +/* 463 */ System.out.println("Couldn't do:" + file.getName()); +/* */ } +/* */ catch (ParserConfigurationException e2) { +/* 466 */ e.printStackTrace(); +/* */ } +/* */ catch (SAXException e2) { +/* 469 */ e.printStackTrace(); +/* */ } +/* */ } +/* */ catch (ParserConfigurationException e) { +/* 473 */ e.printStackTrace(); +/* */ } +/* */ catch (SAXException e) { +/* 476 */ e.printStackTrace(); +/* */ } +/* */ +/* */ } +/* */ else +/* */ { +/* 482 */ System.out.println(" UPDATE FILE:" + file + " lang: " + lang); +/* */ +/* 484 */ this.counter += 1; +/* */ try { +/* 486 */ addDocument(file, lang, dcMetaData, textId); +/* */ } +/* */ catch (ParserConfigurationException e) { +/* 489 */ e.printStackTrace(); +/* */ } +/* */ catch (SAXException e) { +/* 492 */ e.printStackTrace(); +/* */ } +/* */ } +/* */ } +/* */ +/* */ protected Boolean addDocument(File file, String lang, String dcMetaData, String textId) +/* */ throws CorruptIndexException, IOException, FileNotFoundException, UnsupportedEncodingException, ParserConfigurationException, SAXException +/* */ { +/* 509 */ if (dcMetaData != null) { +/* 510 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),lang, dcMetaData, textId), lang); +/* 511 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),"all", dcMetaData, textId), "all"); +/* */ } +/* */ else +/* */ { +/* 515 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),lang, textId), lang); +/* 516 */ this.languageAnalyzers.addDocument(FileDocument.Document(file, absPathToTextId(file),"all", textId), "all"); +/* */ } +/* 518 */ return Boolean.valueOf(true); +/* */ } +/* */ +/* */ private String getTextId(File file) +/* */ { +/* */ try +/* */ { +/* 529 */ File parent = file.getParentFile(); +/* */ +/* 531 */ if (parent.getName().equals("text")) +/* 532 */ return absPathToTextId(parent.getParentFile().getAbsolutePath()); +/* 533 */ if (parent.getParentFile().getName().equals("text")) +/* 534 */ return absPathToTextId(parent.getParentFile().getParentFile().getAbsolutePath()); +/* 535 */ if (parent.getParentFile().getParentFile().getName().equals("text")) { +/* 536 */ return absPathToTextId(parent.getParentFile().getParentFile().getParentFile().getAbsolutePath()); +/* */ } +/* 538 */ return null; +/* */ } +/* */ catch (RuntimeException e) { +/* 541 */ e.printStackTrace(); +/* 542 */ }return null; +/* */ } +/* */ + protected String absPathToTextId(File file) + /* */ { + try { + return absPathToTextId(file.getCanonicalPath()); + } catch (IOException e) { + + e.printStackTrace(); + return ""; + } + } + +/* */ protected String absPathToTextId(String absolutePath) +/* */ { +/* 555 */ if (this.specialMode.equals("vlp")) +/* */ { +/* 557 */ String[] splitted = absolutePath.split("lit"); +/* 558 */ return splitted[1]; +/* */ } +/* */ +/* 562 */ Pattern p = Pattern.compile(TEXTIDFROMPATH_REGEXP); +/* 563 */ Matcher m = p.matcher(absolutePath); +/* 564 */ m.matches(); +/* 565 */ if (m.groupCount() > 0) { +/* 566 */ return m.group(1); +/* */ } +/* 568 */ System.err.println("correctPath: not a mpiwg path / no changes done" + absolutePath); +/* 569 */ return absolutePath; +/* */ } +/* */ +/* */ private int checkFileAndRemoveOldFile(String filePath, String lang, boolean deleteWrongLanguage, long fileModDate) +/* */ throws CorruptIndexException, IOException +/* */ { +/* 577 */ lang = checkSupportedLanguages(lang); +/* 578 */ System.out.println("lang converted+" + lang); +/* 579 */ //TermQuery query = new TermQuery(new Term("path", filePath)); + TermQuery query = new TermQuery(new Term("cleanedPath", absPathToTextId(filePath))); +/* */ +/* 582 */ HashMap results = this.languageAnalyzers.search(query); +/* */ +/* 584 */ if (results == null) { +/* 585 */ return 0; +/* */ } +/* 587 */ for (String resultLang : results.keySet()) +/* */ { +/* 589 */ TopScoreDocCollector collector = (TopScoreDocCollector)results.get(resultLang); +/* */ +/* 591 */ if ((collector == null) || (collector.getTotalHits() <= 0)) +/* */ continue; +/* 593 */ if ((!resultLang.equals(lang)) && (deleteWrongLanguage) && (!resultLang.equals("morph"))) +/* */ { +/* 595 */ this.languageAnalyzers.deleteDocuments(query); +/* */ +/* 603 */ System.out.println("language changed:" + filePath); +/* 604 */ return 1; +/* */ } +/* */ +/* 607 */ if (!resultLang.equals(lang)) +/* */ continue; +/* 609 */ TopDocs docs = collector.topDocs(); +/* */ ScoreDoc[] arrayOfScoreDoc; +/* 610 */ if ((arrayOfScoreDoc = docs.scoreDocs).length == 0) continue; ScoreDoc doc = arrayOfScoreDoc[0]; +/* 611 */ String modDate = this.languageAnalyzers.getAnalyzer(resultLang).reader.document(doc.doc).getField("modified").stringValue(); +/* */ +/* 613 */ String fileDate = DateTools.timeToString(fileModDate, DateTools.Resolution.MINUTE); +/* 614 */ if (!fileDate.equals(modDate)) +/* */ { +/* 618 */ System.out.println("new file:" + filePath); +/* 619 */ this.languageAnalyzers.deleteDocuments(query); +/* 620 */ return 2; +/* */ } +/* */ +/* 623 */ return -1; +/* */ } +/* */ +/* 631 */ return 0; +/* */ } +/* */ +/* */ private String checkSupportedLanguages(String lang) +/* */ { +/* 643 */ if (this.languageAnalyzers.getAnalyzer(lang) == null) +/* 644 */ return "all"; +/* 645 */ return lang; +/* */ } +/* */ +/* */ public void setIndexMetaPriority(boolean prio) +/* */ { +/* 650 */ this.indexMetaPriority = prio; +/* */ } +/* */ +/* */ public boolean getIndexMetaPriority() { +/* 654 */ return this.indexMetaPriority; +/* */ } +/* */ +/* */ private boolean isTextFile(File file) +/* */ { +/* 659 */ String fn = file.getName(); +/* */ +/* 661 */ String[] splitted = fn.split("[.]"); +/* */ +/* 663 */ String ext = ""; +/* */ +/* 665 */ if (splitted.length > 1) +/* */ { +/* 667 */ ext = splitted[(splitted.length - 1)]; +/* */ } +/* */ +/* 670 */ return this.fileTypesToIndex.contains(ext); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.harvester.processors.ProcessFileThread + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,17 @@ +K 25 +svn:wc:ra_dav:version-url +V 83 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/utils +END +ParseOcrDocument.java +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/utils/ParseOcrDocument.java +END +ParseIndexMeta.java +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/utils/ParseIndexMeta.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,103 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/fulltextIndexer/utils +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +ParseIndexMeta.java +file + + + + +2010-06-16T07:59:06.000000Z +6cd2b68e0d298ca187de9bf2108c84ad +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +2587 + +ParseOcrDocument.java +file + + + + +2010-06-16T09:33:27.000000Z +d67cf24067aeb4acf816bc2cda3f3ea4 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +5834 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseIndexMeta.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseIndexMeta.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseOcrDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/prop-base/ParseOcrDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseIndexMeta.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseIndexMeta.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,85 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.utils; +/* */ +/* */ import java.util.HashMap; +/* */ import org.xml.sax.Attributes; +/* */ import org.xml.sax.ContentHandler; +/* */ import org.xml.sax.Locator; +/* */ import org.xml.sax.SAXException; +/* */ +/* */ public class ParseIndexMeta +/* */ implements ContentHandler +/* */ { +/* 23 */ public HashMap value = new HashMap(); +/* 24 */ private String currentToken = new String(); +/* 25 */ public String lang = new String(); +/* */ +/* */ public void startDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void endDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void characters(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* 41 */ char[] dest = (char[])null; +/* 42 */ if (!((this.currentToken != null & this.currentToken.equals("lang")))) +/* */ return; +/* 44 */ dest = new char[length]; +/* 45 */ System.arraycopy(c, start, dest, 0, length); +/* 46 */ this.lang = new String(dest); +/* */ } +/* */ +/* */ public void ignorableWhitespace(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void processingInstruction(String target, String data) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void setDocumentLocator(Locator arg1) +/* */ { +/* */ } +/* */ +/* */ public void endElement(String uri, String localName, String name) +/* */ throws SAXException +/* */ { +/* 70 */ this.currentToken = ""; +/* */ } +/* */ +/* */ public void endPrefixMapping(String prefix) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void skippedEntity(String name) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void startElement(String uri, String localName, String name, Attributes attrs) +/* */ throws SAXException +/* */ { +/* 88 */ if (name.equals("lang")) +/* 89 */ this.currentToken = "lang"; +/* */ else +/* 91 */ this.currentToken = ""; +/* */ } +/* */ +/* */ public void startPrefixMapping(String prefix, String uri) +/* */ throws SAXException +/* */ { +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseOcrDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/.svn/text-base/ParseOcrDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,163 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.utils; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.ArrayList; +/* */ import org.xml.sax.Attributes; +/* */ import org.xml.sax.InputSource; +/* */ import org.xml.sax.Locator; +/* */ import org.xml.sax.SAXException; +/* */ import org.xml.sax.helpers.DefaultHandler; +/* */ +/* */ public class ParseOcrDocument extends DefaultHandler +/* */ { +/* 24 */ public OCRDocument ocrDocument = new OCRDocument(); +/* */ private int lineCounter; +/* */ private OCRDocument.OCRLine currentLine; +/* */ private boolean inLine; +/* */ +/* */ public void startDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void endDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void characters(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* 38 */ if (this.inLine) +/* 39 */ this.currentLine.content += new String(c, start, length); +/* */ } +/* */ +/* */ public void ignorableWhitespace(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void processingInstruction(String target, String data) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void setDocumentLocator(Locator arg1) +/* */ { +/* */ } +/* */ +/* */ public void endElement(String uri, String localName, String name) +/* */ throws SAXException +/* */ { +/* 61 */ if ((!name.equals("span")) || +/* 63 */ (!this.inLine)) return; +/* 64 */ this.ocrDocument.OCRLines.add(this.currentLine); +/* 65 */ this.inLine = false; +/* */ } +/* */ +/* */ public void endPrefixMapping(String prefix) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void skippedEntity(String name) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void startElement(String uri, String localName, String name, Attributes attrs) +/* */ throws SAXException +/* */ { +/* 83 */ if ((name.equals("div")) && +/* 84 */ (attrs != null)) { +/* 85 */ int length = attrs.getLength(); +/* */ +/* 87 */ for (int i = 0; i < length; ++i) { +/* 88 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 89 */ (!attrs.getValue(i).equals("ocr_page"))) continue; +/* 90 */ doPage(attrs); +/* */ } +/* */ +/* */ } +/* */ +/* 96 */ if ((!name.equals("span")) || +/* 97 */ (attrs == null)) return; +/* 98 */ int length = attrs.getLength(); +/* */ +/* 100 */ for (int i = 0; i < length; ++i) { +/* 101 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 102 */ (!attrs.getValue(i).equals("ocr_line"))) continue; +/* 103 */ doLine(attrs); +/* */ } +/* */ } +/* */ +/* */ private void doPage(Attributes attrs) +/* */ { +/* 111 */ int length = attrs.getLength(); +/* */ +/* 113 */ for (int i = 0; i < length; ++i) { +/* 114 */ if (attrs.getLocalName(i).equals("title")) { +/* 115 */ String title = attrs.getValue(i); +/* 116 */ String[] splitted = title.split(" "); +/* 117 */ String dimension = splitted[(splitted.length - 2)] + " " + +/* 118 */ splitted[(splitted.length - 1)]; +/* 119 */ this.ocrDocument.pageDimension = dimension; +/* */ } +/* */ } +/* 122 */ this.lineCounter = 0; +/* */ } +/* */ +/* */ private void doLine(Attributes attrs) +/* */ { +/* */ + OCRDocument doc = new OCRDocument(); + + this.currentLine = doc.new OCRLine(); +/* 128 */ this.inLine = true; +/* 129 */ this.currentLine.content = new String(); +/* */ +/* 131 */ int length = attrs.getLength(); +/* 132 */ this.currentLine.lineNumber = String.valueOf(this.lineCounter); +/* 133 */ this.lineCounter += 1; +/* 134 */ this.currentLine.bbox = "0 0"; +/* */ +/* 137 */ for (int i = 0; i < length; ++i) +/* 138 */ if (attrs.getLocalName(i).equals("title")) { +/* 139 */ String title = attrs.getValue(i); +/* */ +/* 141 */ String dimension = title.replace("bbox ", ""); +/* 142 */ this.currentLine.bbox = dimension; +/* */ } +/* */ } +/* */ +/* */ public void startPrefixMapping(String prefix, String uri) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public InputSource resolveEntity(String publicId, String systemId) +/* */ throws SAXException, IOException +/* */ { +/* 160 */ File f = new File( +/* 161 */ "/Users/dwinter/text-tools/fulltextsearch/catalog/xhtml1-transitional.dtd"); +/* 162 */ if (!f.exists()) +/* 163 */ f = new File( +/* 164 */ "/usr/local/apache-tomcat-6.0.16/webapps/fulltextsearch/catalog/xhtml1-transitional.dtd"); +/* 165 */ if (!f.exists()) { +/* 166 */ System.err.println("Cant't find xhtml-dtd: MyResolver"); +/* 167 */ return null; +/* */ } +/* 169 */ if (publicId.equals("-//W3C//DTD XHTML 1.0 Transitional//EN")) +/* 170 */ return new InputSource(f.getAbsolutePath()); +/* 171 */ return new InputSource(systemId); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/ParseIndexMeta.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/ParseIndexMeta.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,85 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.utils; +/* */ +/* */ import java.util.HashMap; +/* */ import org.xml.sax.Attributes; +/* */ import org.xml.sax.ContentHandler; +/* */ import org.xml.sax.Locator; +/* */ import org.xml.sax.SAXException; +/* */ +/* */ public class ParseIndexMeta +/* */ implements ContentHandler +/* */ { +/* 23 */ public HashMap value = new HashMap(); +/* 24 */ private String currentToken = new String(); +/* 25 */ public String lang = new String(); +/* */ +/* */ public void startDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void endDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void characters(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* 41 */ char[] dest = (char[])null; +/* 42 */ if (!((this.currentToken != null & this.currentToken.equals("lang")))) +/* */ return; +/* 44 */ dest = new char[length]; +/* 45 */ System.arraycopy(c, start, dest, 0, length); +/* 46 */ this.lang = new String(dest); +/* */ } +/* */ +/* */ public void ignorableWhitespace(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void processingInstruction(String target, String data) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void setDocumentLocator(Locator arg1) +/* */ { +/* */ } +/* */ +/* */ public void endElement(String uri, String localName, String name) +/* */ throws SAXException +/* */ { +/* 70 */ this.currentToken = ""; +/* */ } +/* */ +/* */ public void endPrefixMapping(String prefix) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void skippedEntity(String name) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void startElement(String uri, String localName, String name, Attributes attrs) +/* */ throws SAXException +/* */ { +/* 88 */ if (name.equals("lang")) +/* 89 */ this.currentToken = "lang"; +/* */ else +/* 91 */ this.currentToken = ""; +/* */ } +/* */ +/* */ public void startPrefixMapping(String prefix, String uri) +/* */ throws SAXException +/* */ { +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.utils.ParseIndexMeta + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/fulltextIndexer/utils/ParseOcrDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltextIndexer/utils/ParseOcrDocument.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,163 @@ +/* */ package de.mpiwg.dwinter.fulltextIndexer.utils; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.ArrayList; +/* */ import org.xml.sax.Attributes; +/* */ import org.xml.sax.InputSource; +/* */ import org.xml.sax.Locator; +/* */ import org.xml.sax.SAXException; +/* */ import org.xml.sax.helpers.DefaultHandler; +/* */ +/* */ public class ParseOcrDocument extends DefaultHandler +/* */ { +/* 24 */ public OCRDocument ocrDocument = new OCRDocument(); +/* */ private int lineCounter; +/* */ private OCRDocument.OCRLine currentLine; +/* */ private boolean inLine; +/* */ +/* */ public void startDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void endDocument() +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void characters(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* 38 */ if (this.inLine) +/* 39 */ this.currentLine.content += new String(c, start, length); +/* */ } +/* */ +/* */ public void ignorableWhitespace(char[] c, int start, int length) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void processingInstruction(String target, String data) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void setDocumentLocator(Locator arg1) +/* */ { +/* */ } +/* */ +/* */ public void endElement(String uri, String localName, String name) +/* */ throws SAXException +/* */ { +/* 61 */ if ((!name.equals("span")) || +/* 63 */ (!this.inLine)) return; +/* 64 */ this.ocrDocument.OCRLines.add(this.currentLine); +/* 65 */ this.inLine = false; +/* */ } +/* */ +/* */ public void endPrefixMapping(String prefix) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void skippedEntity(String name) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public void startElement(String uri, String localName, String name, Attributes attrs) +/* */ throws SAXException +/* */ { +/* 83 */ if ((name.equals("div")) && +/* 84 */ (attrs != null)) { +/* 85 */ int length = attrs.getLength(); +/* */ +/* 87 */ for (int i = 0; i < length; ++i) { +/* 88 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 89 */ (!attrs.getValue(i).equals("ocr_page"))) continue; +/* 90 */ doPage(attrs); +/* */ } +/* */ +/* */ } +/* */ +/* 96 */ if ((!name.equals("span")) || +/* 97 */ (attrs == null)) return; +/* 98 */ int length = attrs.getLength(); +/* */ +/* 100 */ for (int i = 0; i < length; ++i) { +/* 101 */ if ((!attrs.getLocalName(i).equals("class")) || +/* 102 */ (!attrs.getValue(i).equals("ocr_line"))) continue; +/* 103 */ doLine(attrs); +/* */ } +/* */ } +/* */ +/* */ private void doPage(Attributes attrs) +/* */ { +/* 111 */ int length = attrs.getLength(); +/* */ +/* 113 */ for (int i = 0; i < length; ++i) { +/* 114 */ if (attrs.getLocalName(i).equals("title")) { +/* 115 */ String title = attrs.getValue(i); +/* 116 */ String[] splitted = title.split(" "); +/* 117 */ String dimension = splitted[(splitted.length - 2)] + " " + +/* 118 */ splitted[(splitted.length - 1)]; +/* 119 */ this.ocrDocument.pageDimension = dimension; +/* */ } +/* */ } +/* 122 */ this.lineCounter = 0; +/* */ } +/* */ +/* */ private void doLine(Attributes attrs) +/* */ { +/* */ + OCRDocument doc = new OCRDocument(); + + this.currentLine = doc.new OCRLine(); +/* 128 */ this.inLine = true; +/* 129 */ this.currentLine.content = new String(); +/* */ +/* 131 */ int length = attrs.getLength(); +/* 132 */ this.currentLine.lineNumber = String.valueOf(this.lineCounter); +/* 133 */ this.lineCounter += 1; +/* 134 */ this.currentLine.bbox = "0 0"; +/* */ +/* 137 */ for (int i = 0; i < length; ++i) +/* 138 */ if (attrs.getLocalName(i).equals("title")) { +/* 139 */ String title = attrs.getValue(i); +/* */ +/* 141 */ String dimension = title.replace("bbox ", ""); +/* 142 */ this.currentLine.bbox = dimension; +/* */ } +/* */ } +/* */ +/* */ public void startPrefixMapping(String prefix, String uri) +/* */ throws SAXException +/* */ { +/* */ } +/* */ +/* */ public InputSource resolveEntity(String publicId, String systemId) +/* */ throws SAXException, IOException +/* */ { +/* 160 */ File f = new File( +/* 161 */ "/Users/dwinter/text-tools/fulltextsearch/catalog/xhtml1-transitional.dtd"); +/* 162 */ if (!f.exists()) +/* 163 */ f = new File( +/* 164 */ "/usr/local/apache-tomcat-6.0.16/webapps/fulltextsearch/catalog/xhtml1-transitional.dtd"); +/* 165 */ if (!f.exists()) { +/* 166 */ System.err.println("Cant't find xhtml-dtd: MyResolver"); +/* 167 */ return null; +/* */ } +/* 169 */ if (publicId.equals("-//W3C//DTD XHTML 1.0 Transitional//EN")) +/* 170 */ return new InputSource(f.getAbsolutePath()); +/* 171 */ return new InputSource(systemId); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.fulltextIndexer.utils.ParseOcrDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +K 25 +svn:wc:ra_dav:version-url +V 74 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools +END +LineCollector.java +K 25 +svn:wc:ra_dav:version-url +V 93 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/LineCollector.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,75 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +LineCollector.java +file + + + + +2010-06-16T09:39:31.000000Z +f9ec1193d7d68cc8377cf38ef0af2451 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +723 + +analyzer +dir + +documents +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/.svn/prop-base/LineCollector.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/.svn/prop-base/LineCollector.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/.svn/text-base/LineCollector.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/.svn/text-base/LineCollector.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,27 @@ +/* */ package de.mpiwg.dwinter.lucencetools; +/* */ +/* */ import java.io.IOException; +/* */ import org.apache.lucene.search.TopScoreDocCollector; +/* */ +/* */ public class LineCollector extends TopScoreDocCollector +/* */ { +/* */ private LineCollector(int numhits) +/* */ { + super(numhits); +/* */ } +/* */ +/* */ public boolean acceptsDocsOutOfOrder() +/* */ { +/* 28 */ return true; +/* */ } +/* */ +/* */ public void collect(int arg0) +/* */ throws IOException +/* */ { +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.LineCollector + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/LineCollector.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/LineCollector.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,27 @@ +/* */ package de.mpiwg.dwinter.lucencetools; +/* */ +/* */ import java.io.IOException; +/* */ import org.apache.lucene.search.TopScoreDocCollector; +/* */ +/* */ public class LineCollector extends TopScoreDocCollector +/* */ { +/* */ private LineCollector(int numhits) +/* */ { + super(numhits); +/* */ } +/* */ +/* */ public boolean acceptsDocsOutOfOrder() +/* */ { +/* 28 */ return true; +/* */ } +/* */ +/* */ public void collect(int arg0) +/* */ throws IOException +/* */ { +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.LineCollector + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 83 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer +END +LanguageAnalyzer.java +K 25 +svn:wc:ra_dav:version-url +V 105 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzer.java +END +LanguageAnalyzers.java +K 25 +svn:wc:ra_dav:version-url +V 106 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.java +END +XMLFilteredReader.java +K 25 +svn:wc:ra_dav:version-url +V 106 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer/XMLFilteredReader.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,140 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +LanguageAnalyzer.java +file +2873 + + + +2010-06-29T07:53:25.000000Z +faff08c5b7ad77406dd545ef0521431c +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +2884 + +LanguageAnalyzers.java +file +2873 + + + +2010-06-29T08:24:06.000000Z +54a8b0702003576719230b96be6f363d +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +4368 + +XMLFilteredReader.java +file + + + + +2010-06-16T09:36:10.000000Z +1f672b0443d4a243715f01492c912488 +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +2077 + +donatusAnalyzer +dir + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzer.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzer.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzers.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/LanguageAnalyzers.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/XMLFilteredReader.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/prop-base/XMLFilteredReader.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzer.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzer.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,76 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer; +/* */ +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import org.apache.lucene.analysis.Analyzer; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.index.IndexReader; +/* */ import org.apache.lucene.index.IndexWriter; +/* */ import org.apache.lucene.index.IndexWriter.MaxFieldLength; +/* */ import org.apache.lucene.search.IndexSearcher; +/* */ import org.apache.lucene.store.Directory; +/* */ import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.store.SimpleFSLockFactory; +/* */ +/* */ public class LanguageAnalyzer +/* */ { +/* */ public String lang; +/* */ public Analyzer analyzer; +/* 19 */ public IndexWriter writer = null; +/* 20 */ public IndexReader reader = null; +/* 21 */ public IndexSearcher searcher = null; +/* */ +/* */ public LanguageAnalyzer(String lang, Analyzer analyzer, File index_dir, boolean onlyReader) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 35 */ this.lang = lang; +/* 36 */ this.analyzer = analyzer; +/* */ +/* 38 */ boolean create = true; +/* */ +/* 40 */ if (!index_dir.exists()) { +/* 41 */ index_dir.mkdir(); +/* */ } +/* */ +/* 44 */ File lang_dir = new File(index_dir.getAbsolutePath() + "/" + lang); +/* 45 */ if (lang_dir.exists()) { +/* 46 */ create = false; +/* */ } + File dirFile= new File(index_dir + "/" + lang); +/* 48 */ Directory indexDirectory = new NIOFSDirectory(dirFile); +/* */ indexDirectory.setLockFactory(new SimpleFSLockFactory(dirFile)); // TODO:ansonsten probleme wenn NFS mounts +/* 50 */ if (!onlyReader) +/* */ { +/* 52 */ this.writer = +/* 53 */ new IndexWriter(indexDirectory, +/* 53 */ analyzer, IndexWriter.MaxFieldLength.UNLIMITED); +/* */ } +/* 55 */ if (!index_dir.exists()) +/* */ { +/* 58 */ return; +/* */ } +/* */ +/* 62 */ create = true; +/* */ +/* 65 */ if (lang_dir.exists()) { +/* 66 */ create = false; +/* */ } +/* 68 */ if (create) +/* */ return; +/* 70 */ IndexReader reader = IndexReader.open(indexDirectory); +/* 71 */ this.reader = reader; +/* 72 */ this.searcher = new IndexSearcher(reader); +/* */ } +/* */ +/* */ public LanguageAnalyzer(String lang, Analyzer analyzer, File index_dir) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 78 */ this(lang, analyzer, index_dir, false); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzers.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/LanguageAnalyzers.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,111 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer; +/* */ +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer; +/* */ import de.mpiwg.dwinter.lucencetools.documents.MorphDocument; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.ArrayList; +/* */ import java.util.HashMap; +/* */ import java.util.Iterator; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.index.IndexReader; +/* */ import org.apache.lucene.index.IndexWriter; +/* */ import org.apache.lucene.index.Term; +/* */ import org.apache.lucene.search.Collector; +/* */ import org.apache.lucene.search.IndexSearcher; +/* */ import org.apache.lucene.search.TermQuery; +/* */ import org.apache.lucene.search.TopScoreDocCollector; +/* */ +/* */ public class LanguageAnalyzers extends ArrayList +/* */ { +/* */ private static final long serialVersionUID = 2L; +/* */ private static final int MAX_HITS_PER_PAGE = 10; +/* */ +/* */ public void optimize() +/* */ throws CorruptIndexException, IOException +/* */ { +/* 43 */ Iterator indexWriter = iterator(); +/* 44 */ while (indexWriter.hasNext()) +/* */ { +/* 46 */ ((LanguageAnalyzer)indexWriter.next()).writer.optimize(); +/* */ } +/* */ } +/* */ +/* */ public void close() +/* */ throws CorruptIndexException, IOException +/* */ { +/* 57 */ Iterator indexWriter = iterator(); +/* 58 */ while (indexWriter.hasNext()) +/* */ { +/* 60 */ ((LanguageAnalyzer)indexWriter.next()).writer.close(); +/* */ } +/* */ } +/* */ +/* */ public void addDocument(Document document, String lang) +/* */ throws CorruptIndexException, IOException +/* */ { +/* 74 */ LanguageAnalyzer analyzer = getAnalyzer(lang); +/* */ +/* 76 */ if (analyzer != null) +/* */ { +/* 78 */ analyzer.writer.addDocument(document); +/* */ +/* 81 */ if (!DonatusAnalyzer.class.isInstance(analyzer.analyzer)) +/* */ return; +/* 83 */ StringBuffer mp = DonatusAnalyzer.morphFile; +/* */ +/* 86 */ LanguageAnalyzer mpAnalyzer = getAnalyzer("morph"); +/* */ +/* 88 */ mpAnalyzer.writer.addDocument(MorphDocument.Document(mp.toString(), document.get("cleanedPath"))); +/* */ } +/* */ else +/* */ { + + System.err.println("addDocument: cannot add the document language " + lang + " not known!"); +/* */ } +/* */ } +/* */ +/* */ public LanguageAnalyzer getAnalyzer(String lang) +/* */ { +/* 106 */ for (LanguageAnalyzer analyzer : this) +/* 107 */ if (analyzer.lang.equals(lang)) +/* 108 */ return analyzer; +/* 109 */ return null; +/* */ } +/* */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { +/* 112 */ for (LanguageAnalyzer analyzer : this) +/* 113 */ analyzer.writer.deleteDocuments(term); +/* */ } +/* */ +/* */ public void deleteDocuments(TermQuery query) throws CorruptIndexException, IOException +/* */ { +/* 118 */ for (LanguageAnalyzer analyzer : this) +/* 119 */ analyzer.writer.deleteDocuments(query); +/* */ } +/* */ +/* */ public void deleteDocument(int id) throws CorruptIndexException, IOException +/* */ { +/* 124 */ for (LanguageAnalyzer analyzer : this) +/* 125 */ analyzer.reader.deleteDocument(id); +/* */ } +/* */ +/* */ public HashMap search(TermQuery query) throws IOException { +/* 129 */ HashMap collectors = new HashMap(); +/* 130 */ for (LanguageAnalyzer analyzer : this) +/* */ { +/* 132 */ if (analyzer.searcher == null) +/* */ continue; +/* 134 */ Collector col = TopScoreDocCollector.create(10, false); +/* 135 */ analyzer.searcher.search(query, col); +/* 136 */ collectors.put(analyzer.lang, col); +/* */ } +/* */ +/* 140 */ return collectors; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/XMLFilteredReader.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/.svn/text-base/XMLFilteredReader.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,72 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer; +/* */ +/* */ +/* */ import java.io.IOException; +/* */ import java.io.InputStream; +/* */ import java.io.InputStreamReader; +/* */ import java.io.UnsupportedEncodingException; +/* */ +/* */ public class XMLFilteredReader extends InputStreamReader +/* */ { +/* */ public XMLFilteredReader(InputStream in, String charsetName) +/* */ throws UnsupportedEncodingException +/* */ { +/* 12 */ super(in, charsetName); +/* */ } +/* */ +/* */ public int read() throws IOException +/* */ { +/* 17 */ int c = super.read(); +/* 18 */ if (c == -1) { +/* 19 */ return c; +/* */ } +/* 21 */ boolean isXML = false; +/* */ +/* 23 */ if (c == 60) +/* 24 */ isXML = true; +/* 25 */ while (isXML) { +/* 26 */ c = super.read(); +/* 27 */ if (c == -1) +/* 28 */ return c; +/* 29 */ if (c == 62) { +/* 30 */ isXML = false; +/* */ } +/* */ } +/* 33 */ return super.read(); +/* */ } +/* */ +/* */ public int read(char[] cbuf, int offset, int length) throws IOException +/* */ { +/* 38 */ char[] cbuf2 = new char[cbuf.length]; +/* 39 */ int i = 0; +/* 40 */ int retLen = super.read(cbuf2, offset, length); +/* 41 */ if (retLen <= 0) { +/* 42 */ cbuf = (char[])cbuf2.clone(); +/* 43 */ return retLen; +/* */ } +/* 45 */ boolean isXML = false; +/* 46 */ for (char c : cbuf2) { +/* 47 */ if (c == '<') { +/* 48 */ isXML = true; +/* */ } +/* 50 */ //if (c == '\0/7') { +/* */ // break; +/* */ //} +/* 53 */ if (!isXML) { +/* 54 */ cbuf[i] = c; +/* 55 */ ++i; +/* */ } +/* 57 */ if (c == '>') { +/* 58 */ isXML = false; +/* */ } +/* */ +/* */ } +/* */ +/* 63 */ return i; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzer.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,76 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer; +/* */ +/* */ import java.io.File; +/* */ import java.io.IOException; +/* */ import org.apache.lucene.analysis.Analyzer; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.index.IndexReader; +/* */ import org.apache.lucene.index.IndexWriter; +/* */ import org.apache.lucene.index.IndexWriter.MaxFieldLength; +/* */ import org.apache.lucene.search.IndexSearcher; +/* */ import org.apache.lucene.store.Directory; +/* */ import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.store.SimpleFSLockFactory; +/* */ +/* */ public class LanguageAnalyzer +/* */ { +/* */ public String lang; +/* */ public Analyzer analyzer; +/* 19 */ public IndexWriter writer = null; +/* 20 */ public IndexReader reader = null; +/* 21 */ public IndexSearcher searcher = null; +/* */ +/* */ public LanguageAnalyzer(String lang, Analyzer analyzer, File index_dir, boolean onlyReader) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 35 */ this.lang = lang; +/* 36 */ this.analyzer = analyzer; +/* */ +/* 38 */ boolean create = true; +/* */ +/* 40 */ if (!index_dir.exists()) { +/* 41 */ index_dir.mkdir(); +/* */ } +/* */ +/* 44 */ File lang_dir = new File(index_dir.getAbsolutePath() + "/" + lang); +/* 45 */ if (lang_dir.exists()) { +/* 46 */ create = false; +/* */ } + File dirFile= new File(index_dir + "/" + lang); +/* 48 */ Directory indexDirectory = new NIOFSDirectory(dirFile); +/* */ indexDirectory.setLockFactory(new SimpleFSLockFactory(dirFile)); // TODO:ansonsten probleme wenn NFS mounts +/* 50 */ if (!onlyReader) +/* */ { +/* 52 */ this.writer = +/* 53 */ new IndexWriter(indexDirectory, +/* 53 */ analyzer, IndexWriter.MaxFieldLength.UNLIMITED); +/* */ } +/* 55 */ if (!index_dir.exists()) +/* */ { +/* 58 */ return; +/* */ } +/* */ +/* 62 */ create = true; +/* */ +/* 65 */ if (lang_dir.exists()) { +/* 66 */ create = false; +/* */ } +/* 68 */ if (create) +/* */ return; +/* 70 */ IndexReader reader = IndexReader.open(indexDirectory); +/* 71 */ this.reader = reader; +/* 72 */ this.searcher = new IndexSearcher(reader); +/* */ } +/* */ +/* */ public LanguageAnalyzer(String lang, Analyzer analyzer, File index_dir) +/* */ throws CorruptIndexException, LockObtainFailedException, IOException +/* */ { +/* 78 */ this(lang, analyzer, index_dir, false); +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/LanguageAnalyzers.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,111 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer; +/* */ +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer; +/* */ import de.mpiwg.dwinter.lucencetools.documents.MorphDocument; +/* */ import java.io.IOException; +/* */ import java.io.PrintStream; +/* */ import java.util.ArrayList; +/* */ import java.util.HashMap; +/* */ import java.util.Iterator; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.index.CorruptIndexException; +/* */ import org.apache.lucene.index.IndexReader; +/* */ import org.apache.lucene.index.IndexWriter; +/* */ import org.apache.lucene.index.Term; +/* */ import org.apache.lucene.search.Collector; +/* */ import org.apache.lucene.search.IndexSearcher; +/* */ import org.apache.lucene.search.TermQuery; +/* */ import org.apache.lucene.search.TopScoreDocCollector; +/* */ +/* */ public class LanguageAnalyzers extends ArrayList +/* */ { +/* */ private static final long serialVersionUID = 2L; +/* */ private static final int MAX_HITS_PER_PAGE = 10; +/* */ +/* */ public void optimize() +/* */ throws CorruptIndexException, IOException +/* */ { +/* 43 */ Iterator indexWriter = iterator(); +/* 44 */ while (indexWriter.hasNext()) +/* */ { +/* 46 */ ((LanguageAnalyzer)indexWriter.next()).writer.optimize(); +/* */ } +/* */ } +/* */ +/* */ public void close() +/* */ throws CorruptIndexException, IOException +/* */ { +/* 57 */ Iterator indexWriter = iterator(); +/* 58 */ while (indexWriter.hasNext()) +/* */ { +/* 60 */ ((LanguageAnalyzer)indexWriter.next()).writer.close(); +/* */ } +/* */ } +/* */ +/* */ public void addDocument(Document document, String lang) +/* */ throws CorruptIndexException, IOException +/* */ { +/* 74 */ LanguageAnalyzer analyzer = getAnalyzer(lang); +/* */ +/* 76 */ if (analyzer != null) +/* */ { +/* 78 */ analyzer.writer.addDocument(document); +/* */ +/* 81 */ if (!DonatusAnalyzer.class.isInstance(analyzer.analyzer)) +/* */ return; +/* 83 */ StringBuffer mp = DonatusAnalyzer.morphFile; +/* */ +/* 86 */ LanguageAnalyzer mpAnalyzer = getAnalyzer("morph"); +/* */ +/* 88 */ mpAnalyzer.writer.addDocument(MorphDocument.Document(mp.toString(), document.get("cleanedPath"))); +/* */ } +/* */ else +/* */ { + + System.err.println("addDocument: cannot add the document language " + lang + " not known!"); +/* */ } +/* */ } +/* */ +/* */ public LanguageAnalyzer getAnalyzer(String lang) +/* */ { +/* 106 */ for (LanguageAnalyzer analyzer : this) +/* 107 */ if (analyzer.lang.equals(lang)) +/* 108 */ return analyzer; +/* 109 */ return null; +/* */ } +/* */ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { +/* 112 */ for (LanguageAnalyzer analyzer : this) +/* 113 */ analyzer.writer.deleteDocuments(term); +/* */ } +/* */ +/* */ public void deleteDocuments(TermQuery query) throws CorruptIndexException, IOException +/* */ { +/* 118 */ for (LanguageAnalyzer analyzer : this) +/* 119 */ analyzer.writer.deleteDocuments(query); +/* */ } +/* */ +/* */ public void deleteDocument(int id) throws CorruptIndexException, IOException +/* */ { +/* 124 */ for (LanguageAnalyzer analyzer : this) +/* 125 */ analyzer.reader.deleteDocument(id); +/* */ } +/* */ +/* */ public HashMap search(TermQuery query) throws IOException { +/* 129 */ HashMap collectors = new HashMap(); +/* 130 */ for (LanguageAnalyzer analyzer : this) +/* */ { +/* 132 */ if (analyzer.searcher == null) +/* */ continue; +/* 134 */ Collector col = TopScoreDocCollector.create(10, false); +/* 135 */ analyzer.searcher.search(query, col); +/* 136 */ collectors.put(analyzer.lang, col); +/* */ } +/* */ +/* 140 */ return collectors; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzers + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/XMLFilteredReader.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/XMLFilteredReader.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,72 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer; +/* */ +/* */ +/* */ import java.io.IOException; +/* */ import java.io.InputStream; +/* */ import java.io.InputStreamReader; +/* */ import java.io.UnsupportedEncodingException; +/* */ +/* */ public class XMLFilteredReader extends InputStreamReader +/* */ { +/* */ public XMLFilteredReader(InputStream in, String charsetName) +/* */ throws UnsupportedEncodingException +/* */ { +/* 12 */ super(in, charsetName); +/* */ } +/* */ +/* */ public int read() throws IOException +/* */ { +/* 17 */ int c = super.read(); +/* 18 */ if (c == -1) { +/* 19 */ return c; +/* */ } +/* 21 */ boolean isXML = false; +/* */ +/* 23 */ if (c == 60) +/* 24 */ isXML = true; +/* 25 */ while (isXML) { +/* 26 */ c = super.read(); +/* 27 */ if (c == -1) +/* 28 */ return c; +/* 29 */ if (c == 62) { +/* 30 */ isXML = false; +/* */ } +/* */ } +/* 33 */ return super.read(); +/* */ } +/* */ +/* */ public int read(char[] cbuf, int offset, int length) throws IOException +/* */ { +/* 38 */ char[] cbuf2 = new char[cbuf.length]; +/* 39 */ int i = 0; +/* 40 */ int retLen = super.read(cbuf2, offset, length); +/* 41 */ if (retLen <= 0) { +/* 42 */ cbuf = (char[])cbuf2.clone(); +/* 43 */ return retLen; +/* */ } +/* 45 */ boolean isXML = false; +/* 46 */ for (char c : cbuf2) { +/* 47 */ if (c == '<') { +/* 48 */ isXML = true; +/* */ } +/* 50 */ //if (c == '\0/7') { +/* */ // break; +/* */ //} +/* 53 */ if (!isXML) { +/* 54 */ cbuf[i] = c; +/* 55 */ ++i; +/* */ } +/* 57 */ if (c == '>') { +/* 58 */ isXML = false; +/* */ } +/* */ +/* */ } +/* */ +/* 63 */ return i; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +K 25 +svn:wc:ra_dav:version-url +V 99 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer +END +DonatusAnalyzer.java +K 25 +svn:wc:ra_dav:version-url +V 120 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/DonatusAnalyzer.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,69 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +DonatusAnalyzer.java +file + + + + +2010-06-16T07:59:06.000000Z +877d39015008cd03df1451a2537daf4c +2010-06-16T19:13:23.990503Z +2621 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +409 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/DonatusAnalyzer.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/prop-base/DonatusAnalyzer.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/DonatusAnalyzer.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/.svn/text-base/DonatusAnalyzer.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer; +/* */ +/* */ public abstract interface DonatusAnalyzer +/* */ { +/* 5 */ public static final StringBuffer morphFile = new StringBuffer(); +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/DonatusAnalyzer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/analyzer/donatusAnalyzer/DonatusAnalyzer.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,11 @@ +/* */ package de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer; +/* */ +/* */ public abstract interface DonatusAnalyzer +/* */ { +/* 5 */ public static final StringBuffer morphFile = new StringBuffer(); +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.analyzer.donatusAnalyzer.DonatusAnalyzer + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/all-wcprops --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/all-wcprops Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,23 @@ +K 25 +svn:wc:ra_dav:version-url +V 84 +/svn/!svn/ver/2621/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/documents +END +OcropusLineDocument.java +K 25 +svn:wc:ra_dav:version-url +V 109 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/documents/OcropusLineDocument.java +END +FileDocument.java +K 25 +svn:wc:ra_dav:version-url +V 102 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/documents/FileDocument.java +END +MorphDocument.java +K 25 +svn:wc:ra_dav:version-url +V 103 +/svn/!svn/ver/2873/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/documents/MorphDocument.java +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/entries --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/entries Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,137 @@ +10 + +dir +2621 +https://it-dev.mpiwg-berlin.mpg.de/svn/fulltextIndexer/trunk/src/de/mpiwg/dwinter/lucencetools/documents +https://it-dev.mpiwg-berlin.mpg.de/svn + + + +2010-06-16T19:13:23.990503Z +2621 +dwinter + + +svn:special svn:externals svn:needs-lock + + + + + + + + + + + +7f2f12ed-034a-4cd9-b60a-d6dc76004f41 + + + + + + +0 + +FileDocument.java +file +2873 + + + +2010-07-15T11:21:28.000000Z +5c80a38e0875af08a8d0abc510e9685b +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +2933 + +MorphDocument.java +file +2873 + + + +2010-06-29T08:24:25.000000Z +96ec38f2cef388c687c3fee67a9093af +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +1091 + +OcropusLineDocument.java +file +2873 + + + +2010-06-29T08:20:29.000000Z +cf1622f50143f6ce2435bed0acf29686 +2010-11-03T11:29:50.531613Z +2873 +dwinter +has-props + + + + + + + + + + + + + + + + + + + + +3111 + diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/FileDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/FileDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/MorphDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/MorphDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/OcropusLineDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/prop-base/OcropusLineDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,9 @@ +K 14 +svn:executable +V 1 +* +K 13 +svn:mime-type +V 10 +text/plain +END diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/FileDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/FileDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,73 @@ +/* */ package de.mpiwg.dwinter.lucencetools.documents; +/* */ +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader; +/* */ import java.io.File; +/* */ import java.io.FileInputStream; +/* */ import java.io.IOException; +/* */ import java.io.Reader; +/* */ import org.apache.lucene.document.DateTools; +/* */ import org.apache.lucene.document.DateTools.Resolution; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.document.Field; +/* */ import org.apache.lucene.document.Field.Index; +/* */ import org.apache.lucene.document.Field.Store; +/* */ +/* */ public class FileDocument +/* */ { + + public static String toXML(Document doc){ + //String path = doc.get("path"); + String cleanedPath = doc.get("cleanedPath"); + String textId = doc.get("textId"); + String md = doc.get("dcMetaData"); + String ret = ""; + ret+= ""+cleanedPath+""; + ret+= ""+textId.replace("/",":")+""; + ret+= ""+textId.replace("/","_")+""; + ret+= ""+md+""; + ret+=""; + return ret; + + } +/* */ public static Document Document(File f, String cleanedPath,String language, String textId) +/* */ throws IOException +/* */ { +/* 63 */ return Document(f, cleanedPath,language, null, textId); +/* */ } +/* */ +/* */ public static Document Document(File f, String cleanedPath,String language, String dcMetaData, String textId) +/* */ throws IOException +/* */ { +/* 70 */ Document doc = new Document(); +/* */ +/* 74 */ doc.add(new Field("path", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 74 */ doc.add(new Field("cleanedPath", cleanedPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 75 */ if (dcMetaData == null) +/* 76 */ dcMetaData = ""; +/* 77 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); +/* */ +/* 79 */ if (textId == null) +/* 80 */ textId = ""; +/* 81 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 87 */ doc.add( +/* 89 */ new Field("modified", +/* 88 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), +/* 89 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 95 */ Reader in = new XMLFilteredReader(new FileInputStream(f), "UTF-8"); +/* */ +/* 98 */ doc.add(new Field("contents", in)); +/* */ +/* 105 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 107 */ return doc; +/* */ } + + +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.FileDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/MorphDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/MorphDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,28 @@ +/* */ package de.mpiwg.dwinter.lucencetools.documents; +/* */ +/* */ import java.io.FileNotFoundException; +/* */ import java.io.UnsupportedEncodingException; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.document.Field; +/* */ import org.apache.lucene.document.Field.Index; +/* */ import org.apache.lucene.document.Field.Store; +/* */ +/* */ public class MorphDocument +/* */ { +/* */ public static Document Document(String donatusMorph, String path) +/* */ throws FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 58 */ Document doc = new Document(); +/* */ +/* 62 */ doc.add(new Field("cleanedPath", path, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 67 */ doc.add(new Field("donatusMorph", donatusMorph, Field.Store.YES, Field.Index.ANALYZED)); +/* */ +/* 69 */ return doc; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.MorphDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.java.svn-base --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/.svn/text-base/OcropusLineDocument.java.svn-base Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,59 @@ +/* */ package de.mpiwg.dwinter.lucencetools.documents; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; +import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; +/* */ import java.io.File; +/* */ import java.io.FileNotFoundException; +/* */ import java.io.PrintStream; +/* */ import java.io.UnsupportedEncodingException; +/* */ import org.apache.lucene.document.DateTools; +/* */ import org.apache.lucene.document.DateTools.Resolution; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.document.Field; +/* */ import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +/* */ +/* */ public class OcropusLineDocument +/* */ { +/* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String textId) +/* */ throws FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 65 */ return Document(f, cleanPath,language, ocrline, pageDimension, null, textId); +/* */ } +/* */ +/* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String dcMetaData, String textId) +/* */ throws FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 72 */ Document doc = new Document(); +/* */ +/* 76 */ doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("cleanedPath", cleanPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 77 */ doc.add(new Field("pageDimension", pageDimension, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 78 */ if (dcMetaData == null) +/* 79 */ dcMetaData = ""; +/* 80 */ System.out.println("++++++++++++++++++DCMD:" + dcMetaData); +/* 81 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); +/* */ +/* 90 */ doc.add( +/* 92 */ new Field("modified", +/* 91 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), +/* 92 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 94 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 106 */ doc.add(new Field("contents", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); +/* 107 */ doc.add(new Field("contentsNormalized", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); +/* */ +/* 111 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 114 */ doc.add(new Field("bbox", ocrline.bbox, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 116 */ doc.add(new Field("lineNumber", ocrline.lineNumber, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 117 */ return doc; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/FileDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/FileDocument.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,73 @@ +/* */ package de.mpiwg.dwinter.lucencetools.documents; +/* */ +/* */ import de.mpiwg.dwinter.lucencetools.analyzer.XMLFilteredReader; +/* */ import java.io.File; +/* */ import java.io.FileInputStream; +/* */ import java.io.IOException; +/* */ import java.io.Reader; +/* */ import org.apache.lucene.document.DateTools; +/* */ import org.apache.lucene.document.DateTools.Resolution; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.document.Field; +/* */ import org.apache.lucene.document.Field.Index; +/* */ import org.apache.lucene.document.Field.Store; +/* */ +/* */ public class FileDocument +/* */ { + + public static String toXML(Document doc){ + //String path = doc.get("path"); + String cleanedPath = doc.get("cleanedPath"); + String textId = doc.get("textId"); + String md = doc.get("dcMetaData"); + String ret = ""; + ret+= ""+cleanedPath+""; + ret+= ""+textId.replace("/",":")+""; + ret+= ""+textId.replace("/","_")+""; + ret+= ""+md+""; + ret+=""; + return ret; + + } +/* */ public static Document Document(File f, String cleanedPath,String language, String textId) +/* */ throws IOException +/* */ { +/* 63 */ return Document(f, cleanedPath,language, null, textId); +/* */ } +/* */ +/* */ public static Document Document(File f, String cleanedPath,String language, String dcMetaData, String textId) +/* */ throws IOException +/* */ { +/* 70 */ Document doc = new Document(); +/* */ +/* 74 */ doc.add(new Field("path", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 74 */ doc.add(new Field("cleanedPath", cleanedPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 75 */ if (dcMetaData == null) +/* 76 */ dcMetaData = ""; +/* 77 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); +/* */ +/* 79 */ if (textId == null) +/* 80 */ textId = ""; +/* 81 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 87 */ doc.add( +/* 89 */ new Field("modified", +/* 88 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), +/* 89 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 95 */ Reader in = new XMLFilteredReader(new FileInputStream(f), "UTF-8"); +/* */ +/* 98 */ doc.add(new Field("contents", in)); +/* */ +/* 105 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 107 */ return doc; +/* */ } + + +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.FileDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/MorphDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/MorphDocument.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,28 @@ +/* */ package de.mpiwg.dwinter.lucencetools.documents; +/* */ +/* */ import java.io.FileNotFoundException; +/* */ import java.io.UnsupportedEncodingException; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.document.Field; +/* */ import org.apache.lucene.document.Field.Index; +/* */ import org.apache.lucene.document.Field.Store; +/* */ +/* */ public class MorphDocument +/* */ { +/* */ public static Document Document(String donatusMorph, String path) +/* */ throws FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 58 */ Document doc = new Document(); +/* */ +/* 62 */ doc.add(new Field("cleanedPath", path, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 67 */ doc.add(new Field("donatusMorph", donatusMorph, Field.Store.YES, Field.Index.ANALYZED)); +/* */ +/* 69 */ return doc; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.MorphDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file diff -r 000000000000 -r dc7622afcfea src/de/mpiwg/dwinter/lucencetools/documents/OcropusLineDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/lucencetools/documents/OcropusLineDocument.java Wed Nov 03 12:33:16 2010 +0100 @@ -0,0 +1,59 @@ +/* */ package de.mpiwg.dwinter.lucencetools.documents; +/* */ +/* */ import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument; +import de.mpiwg.dwinter.fulltextIndexer.OCRutils.OCRDocument.OCRLine; +/* */ import java.io.File; +/* */ import java.io.FileNotFoundException; +/* */ import java.io.PrintStream; +/* */ import java.io.UnsupportedEncodingException; +/* */ import org.apache.lucene.document.DateTools; +/* */ import org.apache.lucene.document.DateTools.Resolution; +/* */ import org.apache.lucene.document.Document; +/* */ import org.apache.lucene.document.Field; +/* */ import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +/* */ +/* */ public class OcropusLineDocument +/* */ { +/* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String textId) +/* */ throws FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 65 */ return Document(f, cleanPath,language, ocrline, pageDimension, null, textId); +/* */ } +/* */ +/* */ public static Document Document(File f, String cleanPath,String language, OCRDocument.OCRLine ocrline, String pageDimension, String dcMetaData, String textId) +/* */ throws FileNotFoundException, UnsupportedEncodingException +/* */ { +/* 72 */ Document doc = new Document(); +/* */ +/* 76 */ doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("cleanedPath", cleanPath, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 77 */ doc.add(new Field("pageDimension", pageDimension, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 78 */ if (dcMetaData == null) +/* 79 */ dcMetaData = ""; +/* 80 */ System.out.println("++++++++++++++++++DCMD:" + dcMetaData); +/* 81 */ doc.add(new Field("dcMetaData", dcMetaData, Field.Store.YES, Field.Index.ANALYZED)); +/* */ +/* 90 */ doc.add( +/* 92 */ new Field("modified", +/* 91 */ DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), +/* 92 */ Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 94 */ doc.add(new Field("textId", textId, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 106 */ doc.add(new Field("contents", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); +/* 107 */ doc.add(new Field("contentsNormalized", ocrline.content, Field.Store.NO, Field.Index.ANALYZED)); +/* */ +/* 111 */ doc.add(new Field("language", language, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 114 */ doc.add(new Field("bbox", ocrline.bbox, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* */ +/* 116 */ doc.add(new Field("lineNumber", ocrline.lineNumber, Field.Store.YES, Field.Index.NOT_ANALYZED)); +/* 117 */ return doc; +/* */ } +/* */ } + +/* Location: /private/tmp/fulltextIndexer.jar + * Qualified Name: de.mpiwg.dwinter.lucencetools.documents.OcropusLineDocument + * JD-Core Version: 0.5.4 + */ \ No newline at end of file