# HG changeset patch # User Josef Willenborg # Date 1369124372 -7200 # Node ID e9fe3186670cc63c6a12cd4b0789082c4d64562f # Parent e845310098bafcf2a35c4ab0a2d3021ced177471 letzter Stand eingecheckt diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.classpath --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.classpath Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.externalToolBuilders/mpiwg-mpdl-cms-web-build.launch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.externalToolBuilders/mpiwg-mpdl-cms-web-build.launch Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.project --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.project Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,46 @@ + + + mpiwg-mpdl-cms-web + + + + + + org.eclipse.wst.jsdt.core.javascriptValidator + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.wst.common.project.facet.core.builder + + + + + org.eclipse.wst.validation.validationbuilder + + + + + org.eclipse.ui.externaltools.ExternalToolBuilder + full,incremental, + + + LaunchConfigHandle + <project>/.externalToolBuilders/mpiwg-mpdl-cms-web-build.launch + + + + + + org.eclipse.jem.workbench.JavaEMFNature + org.eclipse.wst.common.modulecore.ModuleCoreNature + org.eclipse.wst.common.project.facet.core.nature + org.eclipse.jdt.core.javanature + org.eclipse.wst.jsdt.core.jsNature + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/.jsdtscope --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/.jsdtscope Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.core.resources.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.core.resources.prefs Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +#Mon Sep 24 15:59:11 CEST 2012 +eclipse.preferences.version=1 +encoding//src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/DocumentOperation.java=UTF-8 +encoding//src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocInfo.java=UTF-8 +encoding//src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocument.java=UTF-8 diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.jdt.core.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.jdt.core.prefs Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,12 @@ +#Mon Sep 12 15:41:45 CEST 2011 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.6 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.6 diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.common.component --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.common.component Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,10 @@ + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.common.project.facet.core.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.common.project.facet.core.xml Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,10 @@ + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.jsdt.ui.superType.container --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.jsdt.ui.superType.container Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,1 @@ +org.eclipse.wst.jsdt.launching.baseBrowserLibrary \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.jsdt.ui.superType.name --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.jsdt.ui.superType.name Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,1 @@ +Window \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.ws.service.policy.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/.settings/org.eclipse.wst.ws.service.policy.prefs Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,3 @@ +#Mon Sep 12 15:41:45 CEST 2011 +eclipse.preferences.version=1 +org.eclipse.wst.ws.service.policy.projectEnabled=false diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/META-INF/MANIFEST.MF --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/META-INF/MANIFEST.MF Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Class-Path: + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/classes/constants.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/classes/constants.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +documentsDir=/Users/jwillenborg/mpdl/data/xml/documents +luceneDocumentsDir=/Users/jwillenborg/mpdl/data/xml/lucene/documents +luceneNodesDir=/Users/jwillenborg/mpdl/data/xml/lucene/nodes +dataDir=/Users/jwillenborg/mpdl/data/lt +confDir=/Users/jwillenborg/mpdl/data/collectionConfs diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/berkeley-db-3.3.82.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/berkeley-db-3.3.82.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-codec-1.3.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-codec-1.3.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-collections-3.2.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-collections-3.2.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-collections-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-collections-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-collections-NOTICE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-collections-NOTICE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +Apache Commons Collections +Copyright 2001-2008 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-httpclient-3.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-httpclient-3.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-httpclient-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-httpclient-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-httpclient-NOTICE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-httpclient-NOTICE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +Apache Jakarta HttpClient +Copyright 1999-2007 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-io-2.0.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-io-2.0.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-lang3-3.0.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-lang3-3.0.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-logging-1.1.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-logging-1.1.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-logging-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-logging-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-logging-NOTICE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/commons-logging-NOTICE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,6 @@ +Apache Commons Logging +Copyright 2003-2007 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/core-renderer.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/core-renderer.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/httpclient-4.1.2.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/httpclient-4.1.2.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/httpcore-4.1.2.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/httpcore-4.1.2.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/iText-2.0.8.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/iText-2.0.8.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/json_simple-1.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/json_simple-1.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-core-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-core-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-highlighter-3.5.0-javadoc.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-highlighter-3.5.0-javadoc.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-highlighter-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-highlighter-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-memory-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-memory-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-queries-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/lucene-queries-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/microsoft-translator-java-api-0.4.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/microsoft-translator-java-api-0.4.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-cms-web.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-cms-web.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-cms.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-cms.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-lt.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-lt.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-xml.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/mpiwg-mpdl-xml.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/quartz-1.6.5.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/quartz-1.6.5.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/quartz-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/quartz-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,3 @@ +Saxon: + +Release 9.1.0.5 (free version): releases < 9.1.0.7 support saxon extension functions diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon9-dom.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon9-dom.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon9-s9api.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon9-s9api.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon9.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/lib/saxon9.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/web.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/WEB-INF/web.xml Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,173 @@ + + + mpiwg-mpdl-cms-web + + index.html + + + GetDocument + GetDocument + GetDocument + de.mpg.mpiwg.berlin.mpdl.servlets.cms.GetDocument + + + GetDocument + /doc/GetDocument + + + DocumentOperation + DocumentOperation + DocumentOperation + de.mpg.mpiwg.berlin.mpdl.servlets.cms.DocumentOperation + + + DocumentOperation + /doc/DocumentOperation + + + GetPage + GetPage + GetPage + de.mpg.mpiwg.berlin.mpdl.servlets.cms.GetPage + + + GetPage + /query/GetPage + + + QueryDocuments + QueryDocuments + QueryDocuments + de.mpg.mpiwg.berlin.mpdl.servlets.cms.QueryDocuments + + + QueryDocuments + /query/QueryDocuments + + + QueryDocument + QueryDocument + QueryDocument + de.mpg.mpiwg.berlin.mpdl.servlets.cms.QueryDocument + + + QueryDocument + /query/QueryDocument + + + XQueryDocument + XQueryDocument + XQueryDocument + de.mpg.mpiwg.berlin.mpdl.servlets.cms.XQueryDocument + + + XQueryDocument + /query/XQueryDocument + + + MoreLikeThis + MoreLikeThis + MoreLikeThis + de.mpg.mpiwg.berlin.mpdl.servlets.cms.MoreLikeThis + + + MoreLikeThis + /query/MoreLikeThis + + + GetToken + GetToken + GetToken + de.mpg.mpiwg.berlin.mpdl.servlets.cms.GetToken + + + GetToken + /query/GetToken + + + GetDocInfo + GetDocInfo + GetDocInfo + de.mpg.mpiwg.berlin.mpdl.servlets.cms.GetDocInfo + + + GetDocInfo + /query/GetDocInfo + + + GetDocumentJobs + GetDocumentJobs + GetDocumentJobs + de.mpg.mpiwg.berlin.mpdl.servlets.cms.GetDocumentJobs + + + GetDocumentJobs + /doc/GetDocumentJobs + + + + GetDictionaryEntries + GetDictionaryEntries + GetDictionaryEntries + de.mpg.mpiwg.berlin.mpdl.servlets.lt.GetDictionaryEntries + + + GetDictionaryEntries + /lt/GetDictionaryEntries + + + GetLemmas + GetLemmas + GetLemmas + de.mpg.mpiwg.berlin.mpdl.servlets.lt.GetLemmas + + + GetLemmas + /lt/GetLemmas + + + GetForms + GetForms + GetForms + de.mpg.mpiwg.berlin.mpdl.servlets.lt.GetForms + + + GetForms + /lt/GetForms + + + Tokenize + Tokenize + Tokenize + de.mpg.mpiwg.berlin.mpdl.servlets.lt.Tokenize + + + Tokenize + /text/Tokenize + + + Normalize + Normalize + Normalize + de.mpg.mpiwg.berlin.mpdl.servlets.lt.Normalize + + + Normalize + /text/Normalize + + + Transcode + Transcode + Transcode + de.mpg.mpiwg.berlin.mpdl.servlets.lt.Transcode + + + Transcode + /text/Transcode + + + + de.mpg.mpiwg.berlin.mpdl.servlets.cms.MpiwgMpdlCmsWebServletContextListener + + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/cms-services.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/cms-services.html Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,563 @@ + + + + + Max Planck Institute for the History of Science - CMS Services + + +

Max Planck Institute for the History of Science - CMS Services

+ + + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/getDictionaryEntries.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/getDictionaryEntries.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,95 @@ +.body { + margin:3px; + color: #000000; + background-color: #ebebeb; + min-width:1100px; +} +div.title { + font-size: 30px; + font-weight: bold; + margin-bottom: 10px; +} +input.query { + border-radius: 5px; +} +span.inputType { + font-size: 22px; + font-weight:bold; + display:block; + margin-top: 10px; +} + +.tree ul, +.tree li { + padding: 0; + margin: 0; + margin-left: 10px; + list-style: none; +} +.tree input { + position: absolute; + opacity: 0; +} +.tree input + label + ul { + margin: 0 0 0 22px; +} +.tree input + label + ul { + display: none; +} +.tree label, +.tree label::before { + cursor: pointer; +} +.tree input:disabled + label { + cursor: default; + opacity: .6; +} +.tree input:checked:not(:disabled) + label + ul { + display: block; +} +.tree label, +.tree label::before { + background: url("/mpiwg-mpdl-lt-web/images/treecheckbox.png") no-repeat; +} +.tree label, +.tree a, +.tree label::before { + display: inline-block; + height: 16px; + line-height: 16px;, + vertical-align: middle; +} +.tree label { + background-position: 18px 0; +} +.tree label::before { + content: ""; + width: 16px; + margin: 0 22px 0 0; + vertical-align: middle; + background-position: 0 -32px; +} +.tree input:checked + label::before { + background-position: 0 -16px; +} + +/* webkit adjacent element selector bugfix */ +@media screen and (-webkit-min-device-pixel-ratio:0) +{ + .tree + { + -webkit-animation: webkit-adjacent-element-selector-bugfix infinite 1s; + } + + @-webkit-keyframes webkit-adjacent-element-selector-bugfix + { + from + { + padding: 0; + } + to + { + padding: 0; + } + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/page.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/page.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,261 @@ +/* general */ +.it { font-style:italic } /* DESpecs: _ _ and attribute "it" */ +.bf { font-weight:bold } /* DESpecs: */ +.sc { font-variant:small-caps } /* DESpecs: */ +.sub { vertical-align:sub; font-size:.8em } /* DESpecs: <_> */ /* line-height:1em; ? */ +.super { vertical-align:super; font-size:.8em } /* DESpecs: <^> */ +.sm { font-size:smaller } /* DESpecs: , if within

*/ +.ul { text-decoration:underline } /* DESpecs:

    */ +.ol { text-decoration:overline } /* DESpecs:
      */ +.st { text-decoration:line-through } /* DESpecs: */ +.red { color:red } /* DESpecs: */ +.sp { letter-spacing:0.3em } /* DESpecs: */ /* "font-stretch:wider;" is deprecated */ +.center { text-align:center } +.fr { font-family:'Lucida blackletter'} /* preliminary representation of Fraktur */ +.rom { font-family:'default'} + +/* unused */ +.setoff { margin-left:2cm; margin-right:2cm } + +/* highlighting */ +.highlight { background-color:#D3D3D3; } /* LightGrey */ +div.highlight.elem { background-color:#D3D3D3; } /* LightGrey */ +span.highlight.elem { background-color:#D3D3D3; } /* LightGrey */ +span.highlight.hit { background-color:#77DD77; } /* pastell green */ +span.hit.highlight { background-color:#77DD77; } /* pastell green */ + +/* about */ +span.about {float:right;} + +/* query results */ +span.query { font-weight:bold; font-size:150%; } +span.result { display:none } +tr.hit { vertical-align: top; } +td.hitNum { display:inline-block; } +td.hitLink { margin-left:1%; display:inline-block; width:30%; } +td.hitContent { display:inline-block; width:90%; } + +/* xml presentation */ +span.xml.elementName { font-weight:bold;color:purple; } +span.xml.attributeName { font-weight:bold; } +span.xml.attributeValue { color:blue; } +span.xml.comment { color:green; font-style:italic; } +ul.xml.element { margin-left:0px;padding-left:8px } +ul.xml.element.highlight { background-color:#D3D3D3; } /* LightGrey */ + +/* divs */ +div.float.none { + margin-left:10%; +} +div.float.right { + float:right; + clear:both; + margin-right:10% +} + +/* page breaks */ +span.pb { + display:none; +} + +/* Figures */ +span.figure { + display: block; + width: 200px; + margin-top: 0.5em; + margin-bottom: 0.5em; + padding: 5px; + border: 1px dashed silver; +} +span.figureNumber { +} +span.figureNum { + display:none; +} +span.figureNumText { + display: block; +} +span.caption { +} +span.description { + display: block; +} +span.variables { + display: block; +} + +span.handwritten { +} + +/* toc elements */ +div.tocItem { + float:left; + width:90%; +} +span.tocPageRef { + float:right; + vertical-align:top; +} + +/* paragraphs */ +div.p { + margin: 1.12em 0px; +} + +/* initials */ +span.initial { + float: left; + font-family: Georgia; + font-size: 250%; + line-height: 0.8em; + margin-right: 10px; + padding-top: 1px; +} + +/* small caps */ +span.sc { font-weight:bold; } +span.sc.it {font-weight:bold; font-style:italic; } + +/* variables and numbers */ +span.var { font-style:italic; } +span.var.segment { font-style:italic; text-decoration:overline; } +span.var.line { font-style:italic; text-decoration:underline; } +span.var.gnomon { font-style:italic; text-decoration:line-through; } +span.num { color:maroon } + +/* foreign languages */ +span.foreign { color:#006400; } +span.foreign a:link { color:#006400; } +span.foreign a:visited { color:#1B370D; } +span.foreign.el span.foreign.grc span.foreign.greek { color:#006400; } +span.foreign.en span.foreign.english { color:#006400; } +span.foreign.fr span.foreign.french { color:#006400; } +span.foreign.la span.foreign.latin { color:#006400; } + +/* quotes */ +span.q { font-style:italic; } +div.quote { margin-left:10px; font-style:italic; } +div.blockquote { margin-left:10px; font-style:normal; } +div.set-off { margin-left:10px; font-style:italic; } + +/* dictionary / terminology */ +span.term { color:purple; } /* Blue */ +span.entry { display:block; margin-bottom:20px;} +span.entryDiv { margin-left:2em; display:block; margin-bottom:0.5em; margin-top:0.5em;} +span.orth { font-weight:bold; } +span.mentioned { font-style:italic; } + +/* GIS elements */ +span.place { color:#0000FF; } /* Blue */ +span.person { color:#0000FF; } /* Blue */ + +/* sentence */ +span.s.highlight { background-color:#D3D3D3; } /* LightGrey */ +span.s:hover { background-color:#D3D3D3; } /* LightGrey */ +span.s.it { font-style:italic; } + +/* ref */ +span.ref a:link {text-decoration: underline; color: blue;} +span.ref a:visited {text-decoration: none; color: #800080;} +span.ref a:hover {text-decoration: underline; color: blue;} + +/* Links */ +a.dictionary:link {text-decoration: none; color: #892B06;} +a.dictionary:visited {text-decoration: none; color: #892B06;} +a.dictionary:hover {text-decoration: underline; color: #892B06;} + +/* Notes */ +span.note { font-style:italic; } +span.note span.noteSign {vertical-align:super; font-size:.8em; } +span.note span.noteBody span.noteSign {display:none;} +span.note.left { position:absolute; left:2em; width: 5em; font-style:normal; } +span.note.right { position:absolute; left:30em; width: 5em; font-style:normal; } +a.note { + color: blue; + margin-right: 0.2em; +} +a.note:before { + content: url('../images/linkback.png'); + margin-left: 0.1em; + margin-right: 0.2em; +} +a.note:link { text-decoration: none; } +a.note:visited { text-decoration: none; } +a.note:hover { text-decoration: underline; } +a.noteRef { + color: blue; + margin-right: 0.2em; +} +a.noteRef:before { + content: url('../images/linkto.png'); + margin-left: 0.1em; + margin-right: 0.2em; +} +a.noteRef:link { text-decoration: none; } +a.noteRef:visited { text-decoration: none; } +a.noteRef:hover { text-decoration: underline; } +/* +a:link {text-decoration: none;} +a:visited {text-decoration: none} +a:active {text-decoration: none} +a:hover {text-decoration: underline; color: red;} + */ + +/* table */ +table { margin-left:30px; } + +table.toc { width:100%; margin-left:1px; } + +/* page styling for generating PDF documents with Flying Saucer */ +div.pageHeaderTitle { + font-weight:bold; + text-align:center; +} +body { + counter-reset: pn; +} +div.page:before { + float:right; + font:11pt sans-serif; + font-weight:bold; + content:"[Page " counter(pn) "]"; + counter-increment:pn; + page:pdfPage; + clear:both; +} +div.page { + page-break-after:always; + page:pdfPage; + clear:both; +} +@page pdfPage { + size:A4; + margin-top:0.7cm; + margin-bottom:0.7cm; + margin-left:0.7cm; + margin-right:0.7cm; + padding:0.2cm; + border:thin solid #808080; + @top-left { font: 11pt sans-serif; padding-left: 0.2cm; padding-right: 1cm; font-weight:bold;}; + @top-right { font: 11pt sans-serif; white-space: nowrap; font-weight:bold;}; + @bottom-left { font: 11pt sans-serif; white-space: nowrap; font-weight:bold;}; + @bottom-right { font: 11pt sans-serif; white-space: nowrap; content: counter(page);}; +} + +div.tocPage { + page-break-after:always; + page:tocPage; +} +@page tocPage { + size:A4; + margin-top:0.7cm; + margin-bottom:0.7cm; + margin-left:0.7cm; + margin-right:0.7cm; + padding:0.2cm; + border:thin solid #808080; + @top-left { font: 11pt sans-serif; padding-left: 0.2cm; padding-right: 1cm; font-weight:bold;}; + @top-right { font: 11pt sans-serif; white-space: nowrap; font-weight:bold;}; + @bottom-left { font: 11pt sans-serif; white-space: nowrap; font-weight:bold;}; + @bottom-right { font: 11pt sans-serif; white-space: nowrap; content: counter(page);}; +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageNorm.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageNorm.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,12 @@ +span.w a.dictionary { + display:none; +} +span.w span.dictionary { + display:none; +} +span.w span.orig { + display:none; +} +span.w span.reg { + display:none; +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageNormDict.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageNormDict.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,15 @@ +span.w a.dictionary span.orig { + display:none; +} +span.w a.dictionary span.reg { + display:none; +} +span.w span.dictionary span.orig { + display:none; +} +span.w span.dictionary span.reg { + display:none; +} +span.w span.nodictionary { + display:none; +} \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageOrig.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageOrig.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,12 @@ +span.w a.dictionary { + display:none; +} +span.w span.dictionary { + display:none; +} +span.w span.reg { + display:none; +} +span.w span.norm { + display:none; +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageOrigDict.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageOrigDict.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,15 @@ +span.w a.dictionary span.reg { + display:none; +} +span.w a.dictionary span.norm { + display:none; +} +span.w span.dictionary span.reg { + display:none; +} +span.w span.dictionary span.norm { + display:none; +} +span.w span.nodictionary { + display:none; +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageReg.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageReg.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,12 @@ +span.w a.dictionary { + display:none; +} +span.w span.dictionary { + display:none; +} +span.w span.orig { + display:none; +} +span.w span.norm { + display:none; +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageRegDict.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/css/pageRegDict.css Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,15 @@ +span.w a.dictionary span.orig { + display:none; +} +span.w a.dictionary span.norm { + display:none; +} +span.w span.dictionary span.orig { + display:none; +} +span.w span.dictionary span.norm { + display:none; +} +span.w span.nodictionary { + display:none; +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2downarrow.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2downarrow.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2leftarrow.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2leftarrow.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2rightarrow.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2rightarrow.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2uparrow.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/2uparrow.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/book-pointer.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/book-pointer.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/book.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/book.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/camera.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/camera.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/copyleft.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/copyleft.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/dictionary.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/dictionary.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/dictionaryMorph.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/dictionaryMorph.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/dot.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/dot.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/download.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/download.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/echo.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/echo.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/figures.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/figures.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/help.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/help.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/image.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/image.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/imageU.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/imageU.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/info.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/info.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/left.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/left.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/link.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/link.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/linkback.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/linkback.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/linkext.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/linkext.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/linkto.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/linkto.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/malcolm.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/malcolm.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/malcolm.tif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/malcolm.tif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/pirate-joey.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/pirate-joey.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/right.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/right.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/search.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/search.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/searchMorph.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/searchMorph.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/searchStructural.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/searchStructural.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/searchXPath.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/searchXPath.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/slime_logo.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/slime_logo.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/text.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/text.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/textPollux.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/textPollux.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/textPolluxU.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/textPolluxU.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/textU.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/textU.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/toc.gif Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/toc.gif has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/treecheckbox.png Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/treecheckbox.png has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/xml.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/xml.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/xmlU.jpg Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/images/xmlU.jpg has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/index.html Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,15 @@ + + + + + Max Planck Institute for the History of Science - Language technology & CMS services + + +

      Max Planck Institute for the History of Science - Language technology & CMS services

      + + + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/lt-services.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/lt-services.html Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,408 @@ + + + + + Max Planck Institute for the History of Science - Language technology services + + + + + + +
      + [This software is dedicated to Dr. Malcolm Hyman] + Info
      + [It is based on Donatus and Pollux] + Info +
      +

      Max Planck Institute for the History of Science - Language technology services

      + +
        +
      • Url: /mpiwg-mpdl-cms-web/lt/GetDictionaryEntries +
          +
        • Request parameters +
            +
          • query (required) +
              +
            • by one form or lemma (e.g. "revolution")
            • +
            • by a list of forms or lemmas (e.g. "revolution equality brotherliness")
            • +
            • by a prefix range: entries starting with a prefix (e.g. "a*")
            • +
            +
          • +
          • queryDisplay (optional) +
              +
            • display of the query
            • +
            • default: content of parameter "query"
            • +
            +
          • +
          • inputType (optional) +
              +
            • "form"
            • +
            • "lemma"
            • +
            • default: "form"
            • +
            +
          • +
          • language (optional) +
              +
            • ISO 639-3 specifier
            • +
            • default: "eng"
            • +
            +
          • +
          • dictionary (optional) +
              +
            • dictionary name, e.g. "webster"
            • +
            • default: "all" (all dictionaries for the specified language)
            • +
            +
          • +
          • outputType (optional) +
              +
            • this parameter can occur many times (e.g. "outputType=morphCompact&outputType=dictCompact") +
                +
              • "morphCompact"
              • +
              • "dictCompact"
              • +
              • "wikiCompact"
              • +
              • "allCompact" (all output types compact)
              • +
              • "morphFull"
              • +
              • "dictFull"
              • +
              • "wikiFull"
              • +
              • "allFull" (all output types full)
              • +
              +
            • +
            • default: "allCompact"
            • +
            +
          • +
          • outputFormat (optional) +
              +
            • "html"
            • +
            • "xml"
            • +
            • default: "xml"
            • +
            +
          • +
          • normalization (optional) +
              +
            • "none"
            • +
            • "norm"
            • +
            • default: "norm"
            • +
            +
          • +
          • resultPageNumber (optional) +
              +
            • works only for range queries
            • +
            • page number of the result (e.g. "2": result entries from position 51 to 100)
            • +
            • default: "1"
            • +
            +
          • +
          • resultPageSize (optional) +
              +
            • works only for range queries
            • +
            • page size of the result (e.g. "100": each result page has a size of 100)
            • +
            • default: "50"
            • +
            +
          • +
          +
        • +
        • Response output + +
        • +
        +
      • + +
      • Url: /mpiwg-mpdl-cms-web/lt/GetLemmas +
          +
        • Request parameters +
            +
          • query (required) +
              +
            • one form or lemma (e.g. "revolution") or
            • +
            • blank separated list of forms or lemmas (e.g. "revolution equality brotherliness")
            • +
            +
          • +
          • inputType (optional) +
              +
            • "form"
            • +
            • "lemma"
            • +
            • default: "form"
            • +
            +
          • +
          • language (optional) +
              +
            • ISO 639-3 specifier
            • +
            • default: "eng"
            • +
            +
          • +
          • outputType (optional) +
              +
            • "compact"
            • +
            • "full"
            • +
            • default: "compact"
            • +
            +
          • +
          • outputFormat (optional) +
              +
            • "html"
            • +
            • "xml"
            • +
            • "string" (lemma names separated by a blank)
            • +
            • default: "xml"
            • +
            +
          • +
          • normalization (optional) +
              +
            • "none"
            • +
            • "norm"
            • +
            • default: "norm"
            • +
            +
          • +
          +
        • +
        • Response output + +
        • +
        +
      • + +
      • Url: /mpiwg-mpdl-cms-web/lt/GetForms +
          +
        • Request parameters +
            +
          • query (required) +
              +
            • one lemma (e.g. "revolution") or
            • +
            • blank separated list of forms (e.g. "revolution equality brotherliness")
            • +
            +
          • +
          • language (optional) +
              +
            • ISO 639-3 specifier
            • +
            • default: "eng"
            • +
            +
          • +
          • outputType (optional) +
              +
            • "compact"
            • +
            • "full"
            • +
            • default: "compact"
            • +
            +
          • +
          • outputFormat (optional) +
              +
            • "html"
            • +
            • "xml"
            • +
            • "string" (lemma names separated by a blank)
            • +
            • default: "xml"
            • +
            +
          • +
          • normalization (optional) +
              +
            • "none"
            • +
            • "norm"
            • +
            • default: "norm"
            • +
            +
          • +
          +
        • +
        • Response output + +
        • +
        +
      • + +
      • Url: /mpiwg-mpdl-cms-web/text/Tokenize + +
      • + +
      • Url: /mpiwg-mpdl-cms-web/text/Normalize +
          +
        • Request parameters +
            +
          • inputString (required) +
              +
            • string which should be normalized
            • +
            +
          • +
          • language (optional) +
              +
            • ISO 639-3 specifier
            • +
            • default: "eng"
            • +
            +
          • +
          • type (optional) +
              +
            • "dictionary"
            • +
            • "display"
            • +
            • default: "display"
            • +
            +
          • +
          +
        • +
        • Response output + +
        • +
        +
      • + +
      • Url: /mpiwg-mpdl-cms-web/text/Transcode +
          +
        • Request parameters +
            +
          • inputString (required) +
              +
            • string which should be transcoded
            • +
            +
          • +
          • srcEncoding (required) +
              +
            • "betacode"
            • +
            • "buckwalter"
            • +
            • "unicode"
            • +
            +
          • +
          • destEncoding (optional) +
              +
            • "betacode"
            • +
            • "buckwalter"
            • +
            • "unicode"
            • +
            • default: "unicode"
            • +
            +
          • +
          +
        • +
        • Response output + +
        • +
        +
      • +
      + + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/query/index.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/query/index.html Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,313 @@ + + +MPIWG Query project + + + + + + + + + + + + +
      + Query documents Info Query Documents + + Release 0.5, July, 2012 +
      +
      + + + + + + + + + + + + + +
      + + + + + + + + + + + + + + + +
      Documents: + Archimedes DTD
      + (until 2008) +
      + + + Echo Schema
      + (since 2009) +
      + + + TEI Schema
      + (since 2011) +
      + + + Diverse
      + (since 2011) +
      + + +
      + + + +
      +
      +
      + + + + + + + + + + + + + +
      + + + + + + +
      + + + +
      +
      + + + + + +
      +
      + + + + + +
      + Document contains + +
      + + + + +
      +
      + + + + + + + + + +
      + morphological + + + + language: + + + + + translate
      +
      + +
      +
      +
      +

      + See the CMS Services Page, if you find a bug let us know +
      Last software update: July, 2012 + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/query/info.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/query/info.html Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,21 @@ + + + + + Max Planck Institute for the History of Science - CMS services - Documentation + + +

      Max Planck Institute for the History of Science - CMS services- Documentation

      + +

      Document bases

      +
        +
      • Archimedes DTD (until 2008): more than 120 documents (4KB - 18MB), coming from Echo collections "Archimedes" and "Historical Travel Guides", languages: latin, italian, english, german, french, dutch, greek, arabic, chinese
      • +
      • Echo Schema (since 2009): planned more than 100 documents in different languages
      • +
      • TEI Schema (since 2011): different languages, mainly by external document providers
      • +
      • Diverse (since 2011): different languages, no schema prerequesites, mainly HTML documents
      • +
      + +

      Metadata and fulltext search

      +Query power of Lucene, see: Lucene query syntax + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/xsl/generateId.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/WebContent/xsl/generateId.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/build.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/build/build.xml Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,57 @@ + + + mpiwg-mpdl-cms-web + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/DocumentOperation.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/DocumentOperation.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocInfo.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocInfo.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocument.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocument.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocumentJobs.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocumentJobs.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetToken.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetToken.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MoreLikeThis.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MoreLikeThis.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MpiwgMpdlCmsWebServletContextListener.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MpiwgMpdlCmsWebServletContextListener.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocument.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocument.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocuments.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocuments.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/XQueryDocument.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/cms/XQueryDocument.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetDictionaryEntries.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetDictionaryEntries.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetForms.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetForms.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetLemmas.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetLemmas.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/MpiwgMpdlLtWebServletContextListener.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/MpiwgMpdlLtWebServletContextListener.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/Normalize.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/Normalize.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/Tokenize.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/Tokenize.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/Transcode.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/lt/Transcode.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/util/ServletUtil.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/build/classes/de/mpg/mpiwg/berlin/mpdl/servlets/util/ServletUtil.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/conf/constants-thrax.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/conf/constants-thrax.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +documentsDir=/usr/local/java/mpdl-data/xml/documents +luceneDocumentsDir=/usr/local/java/mpdl-data/xml/lucene/documents +luceneNodesDir=/usr/local/java/mpdl-data/xml/lucene/nodes +dataDir=/usr/local/java/mpdl-data/lt +confDir=/usr/local/java/mpdl-data/collectionConfs diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/conf/constants.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/conf/constants.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +documentsDir=/Users/jwillenborg/mpdl/data/xml/documents +luceneDocumentsDir=/Users/jwillenborg/mpdl/data/xml/lucene/documents +luceneNodesDir=/Users/jwillenborg/mpdl/data/xml/lucene/nodes +dataDir=/Users/jwillenborg/mpdl/data/lt +confDir=/Users/jwillenborg/mpdl/data/collectionConfs diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/dist-remote/mpiwg-mpdl-cms-web.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/dist-remote/mpiwg-mpdl-cms-web.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/dist-remote/mpiwg-mpdl-cms-web.war Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/dist-remote/mpiwg-mpdl-cms-web.war has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/dist/mpiwg-mpdl-cms-web.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/dist/mpiwg-mpdl-cms-web.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/dist/mpiwg-mpdl-cms-web.war Binary file software/mpdl-services-new/mpiwg-mpdl-cms-web/dist/mpiwg-mpdl-cms-web.war has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,15 @@ +package de.mpg.mpiwg.berlin.mpdl.exception; + +public class ApplicationException extends Exception { + private static final long serialVersionUID = 1L; + + public ApplicationException(Exception e) { + super(e); + } + + public ApplicationException(String str) { + super(str); + } + +} + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/DocumentOperation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/DocumentOperation.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,102 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.IOException; +import java.io.PrintWriter; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsChainScheduler; +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; +import de.mpg.mpiwg.berlin.mpdl.servlets.util.ServletUtil; + +public class DocumentOperation extends HttpServlet { + private static final long serialVersionUID = 1L; + + public DocumentOperation() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String srcUrlStr = request.getParameter("srcUrl"); + String docId = request.getParameter("docId"); // id in file system: e.g. /tei/en/Test_1789.xml + String mainLanguage = request.getParameter("mainLanguage"); // main language of that document + String elementNames = request.getParameter("elementNames"); + String operation = request.getParameter("operation"); + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "xml"; + if (outputFormat.equals("xml")) { + response.setContentType("text/xml"); + } else { + response.setContentType("text/html"); + } + CmsDocOperation docOperation = new CmsDocOperation(operation, srcUrlStr, null, docId); + if (mainLanguage != null) + docOperation.setMainLanguage(mainLanguage); + String[] elementNamesArray = null; + if (elementNames != null) + elementNamesArray = elementNames.split(" "); + docOperation.setElementNames(elementNamesArray); + try { + if (docId == null || docId.isEmpty()) { + write(response, "Parameter: \"docId\" is not set. Please set parameter \"docId\"."); + return; + } + if (operation.equals("create") || operation.equals("delete")) { + CmsChainScheduler scheduler = CmsChainScheduler.getInstance(); + docOperation = scheduler.doOperation(docOperation); + String jobId = "" + docOperation.getOrderId(); + String baseUrl = ServletUtil.getInstance().getBaseUrl(request); + String docJobUrlStr = baseUrl + "/doc/GetDocumentJobs?id=" + jobId; + if (outputFormat.equals("xml")) { + write(response, ""); + write(response, ""); + write(response, "" + jobId + ""); + write(response, "" + docJobUrlStr + ""); + write(response, ""); + write(response, ""); + } else if (outputFormat.equals("html")) { + write(response, ""); + write(response, "

      " + "Document operation result" + "

      "); + write(response, "See your document operation " + jobId + " " + "here" + ""); + write(response, ""); + } else if (outputFormat.equals("string")) { + write(response, docJobUrlStr); + } + } else { + String errorStr = "Error: Operation: " + operation + " is not supported"; + if (outputFormat.equals("xml")) { + write(response, "" + errorStr + ""); + } else { + write(response, ""); + write(response, "

      " + "Error" + "

      "); + write(response, errorStr); + } + } + PrintWriter out = response.getWriter(); + out.close(); + } catch (Exception e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + // TODO + } + + private void write(HttpServletResponse response, String str) throws IOException { + PrintWriter out = response.getWriter(); + out.write(str); + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocInfo.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetDocInfo.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,195 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Date; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.io.FileUtils; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.util.Util; + +public class GetDocInfo extends HttpServlet { + private static final long serialVersionUID = 1L; + private TocTransformer tocTransformer; + + public GetDocInfo() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + ServletContext context = getServletContext(); + tocTransformer = (TocTransformer) context.getAttribute("tocTransformer"); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String docId = request.getParameter("docId"); + String field = request.getParameter("field"); + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "xml"; + String cssUrl = request.getParameter("cssUrl"); + if (cssUrl == null) { + String baseUrl = getBaseUrl(request); + cssUrl = baseUrl + "/css/page.css"; + } + try { + IndexHandler indexHandler = IndexHandler.getInstance(); + MetadataRecord mdRecord = indexHandler.getDocMetadata(docId); + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html")) + response.setContentType("text/html"); + PrintWriter out = response.getWriter(); + if (mdRecord != null && outputFormat.equals("xml")) { + out.print(""); + out.print("" + docId + ""); + String identifier = mdRecord.getIdentifier(); + if ((field == null || (field != null && field.equals("identifier"))) && identifier != null) + out.print("" + identifier + ""); + String uri = mdRecord.getUri(); + if ((field == null || (field != null && field.equals("uri"))) && uri != null) + out.print("" + uri + ""); + String collectionNames = mdRecord.getCollectionNames(); + if ((field == null || (field != null && field.equals("collectionNames"))) && collectionNames != null) + out.print("" + collectionNames + ""); + String author = mdRecord.getCreator(); + if ((field == null || (field != null && field.equals("author"))) && author != null) + out.print("" + author + ""); + String title = mdRecord.getTitle(); + if ((field == null || (field != null && field.equals("title"))) && title != null) + out.print("" + title + ""); + String language = mdRecord.getLanguage(); + if ((field == null || (field != null && field.equals("language"))) && language != null) + out.print("" + language + ""); + String publisher = mdRecord.getPublisher(); + if ((field == null || (field != null && field.equals("publisher"))) && publisher != null) + out.print("" + publisher + ""); + String date = mdRecord.getYear(); + if ((field == null || (field != null && field.equals("date"))) && date != null) + out.print("" + date + ""); + String subject = mdRecord.getSubject(); + if ((field == null || (field != null && field.equals("subject"))) && subject != null) + out.print("" + subject + ""); + String rights = mdRecord.getRights(); + if ((field == null || (field != null && field.equals("rights"))) && rights != null) + out.print("" + rights + ""); + String license = mdRecord.getLicense(); + if ((field == null || (field != null && field.equals("license"))) && license != null) + out.print("" + license + ""); + String accessRights = mdRecord.getAccessRights(); + if ((field == null || (field != null && field.equals("accessRights"))) && accessRights != null) + out.print("" + accessRights + ""); + String echoId = mdRecord.getEchoId(); + if ((field == null || (field != null && field.equals("echoId"))) && echoId != null) + out.print("" + echoId + ""); + String echoPageImageDir = mdRecord.getEchoPageImageDir(); + if ((field == null || (field != null && field.equals("echoPageImageDir"))) && echoPageImageDir != null) + out.print("" + echoPageImageDir + ""); + String echoFiguresDir = mdRecord.getEchoFiguresDir(); + if ((field == null || (field != null && field.equals("echoFiguresDir"))) && echoFiguresDir != null) + out.print("" + echoFiguresDir + ""); + String mpiwgDocId = mdRecord.getMpiwgDocId(); + if ((field == null || (field != null && field.equals("mpiwgDocId"))) && mpiwgDocId != null) + out.print("" + mpiwgDocId + ""); + if (field == null || (field != null && ! field.equals("toc") && ! field.equals("figures") && ! field.equals("notes") && ! field.equals("notesHandwritten") && ! field.equals("pages") && ! field.equals("places"))) + out.print(""); + int pageCount = mdRecord.getPageCount(); + if (field == null || (field != null && field.equals("countPages"))) + out.print("" + pageCount + ""); + DocumentHandler docHandler = new DocumentHandler(); + String docDir = docHandler.getDocDir(docId); + String tocFileName = docDir + "/toc.xml"; + File tocFile = new File(tocFileName); + String tocStr = FileUtils.readFileToString(tocFile, "utf-8"); + String tocCount = getTocCount(tocStr, "toc"); + out.print("" + tocCount + ""); + String figuresCount = getTocCount(tocStr, "figures"); + out.print("" + figuresCount + ""); + String notesCount = getTocCount(tocStr, "notes"); + out.print("" + notesCount + ""); + String notesHandwrittenCount = getTocCount(tocStr, "notesHandwritten"); + out.print("" + notesHandwrittenCount + ""); + String placesCount = getTocCount(tocStr, "places"); + out.print("" + placesCount + ""); + Date lastModified = mdRecord.getLastModified(); + if ((field == null || (field != null && field.equals("lastModified"))) && lastModified != null) { + String lastModifiedStr = new Util().toXsDate(lastModified); + out.print("" + lastModifiedStr + ""); + } + String schemaName = mdRecord.getSchemaName(); + if ((field == null || (field != null && field.equals("schema"))) && schemaName != null) + out.print("" + schemaName + ""); + if (field == null || (field != null && ! field.equals("toc") && ! field.equals("figures") && ! field.equals("notes") && ! field.equals("notesHandwritten") && ! field.equals("pages") && ! field.equals("places"))) + out.print(""); + if (field != null && (field.equals("toc") || field.equals("figures") || field.equals("notes") || field.equals("notesHandwritten") || field.equals("pages") || field.equals("places"))) { + String tocXmlStr = tocTransformer.transform(tocStr, field, outputFormat); + out.print(tocXmlStr); + } + out.print(""); + } else if (mdRecord != null && outputFormat.equals("html")) { + if (field != null && (field.equals("toc") || field.equals("figures") || field.equals("notes") || field.equals("notesHandwritten") || field.equals("pages") || field.equals("places"))) { + DocumentHandler docHandler = new DocumentHandler(); + String docDir = docHandler.getDocDir(docId); + String tocFileName = docDir + "/toc.xml"; + File tocFile = new File(tocFileName); + String tocStr = FileUtils.readFileToString(tocFile, "utf-8"); + String tocHtmlStr = tocTransformer.transform(tocStr, field, outputFormat); + String title = docId + ", " + field; + String head = "" + title + ""; + String result = "" + head + "" + tocHtmlStr + "" + ""; + out.print(result); + } + } else { + out.print("" + "no document found with id: " + docId + ""); + } + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + private String getBaseUrl(HttpServletRequest request) { + return getServerUrl(request) + request.getContextPath(); + } + + private String getServerUrl(HttpServletRequest request) { + if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) + return request.getScheme() + "://" + request.getServerName(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); + } + + private String getTocCount(String tocStr, String type) { + String tocCount = "0"; + if (tocStr == null) + return "0"; + String listTypeStr = " docOperations = new ArrayList(); + if (getAllJobs) { + docOperations = scheduler.getDocOperations(); + } else { + int jobId = Integer.parseInt(jobIdStr); + CmsDocOperation docOperation = scheduler.getDocOperation(jobId); + if (docOperation != null) + docOperations.add(docOperation); + } + PrintWriter out = response.getWriter(); + String resultStr = ""; + if (outputFormat.equals("xml")) { + response.setContentType("text/xml"); + resultStr = createXmlString(docOperations); + } else if (outputFormat.equals("html")) { + response.setContentType("text/html"); + resultStr = createHtmlString(docOperations); + } else if (outputFormat.equals("error")) { + response.setContentType("text/html"); + resultStr = createErrorString(docOperations); + } + out.print(resultStr); + out.close(); + } catch (Exception e) { + throw new ServletException(e); + } + } + + String createXmlString(ArrayList docOperations) { + StringBuilder result = new StringBuilder(); + if (docOperations != null && ! docOperations.isEmpty()) { + result.append(""); + for (int i=0; i"); + int jobId = docOperation.getOrderId(); + result.append("" + jobId + ""); + result.append("" + docOperation.getName() + ""); + result.append(""); + Date start = docOperation.getStart(); + String startStr = "No start time available because job is scheduled into server queue where other jobs have been started earlier"; + if (start != null) + startStr = start.toString(); + result.append("" + startStr + ""); + Date end = docOperation.getEnd(); + String endStr = "No end time available because job is not finished yet"; + if (end != null) + endStr = end.toString(); + result.append("" + endStr + ""); + String status = docOperation.getStatus(); + result.append("" + status + ""); + String errorMessage = docOperation.getErrorMessage(); + if (errorMessage == null) + errorMessage = "no error"; + result.append("" + errorMessage + ""); + result.append(""); + if (docOperation.getName().equals("delete")) { + result.append(""); + result.append("" + docOperation.getDocIdentifier() + ""); + result.append(""); + } else if (docOperation.getName().equals("create")) { + result.append(""); + result.append("" + docOperation.getSrcUrl() + ""); + result.append("" + docOperation.getUploadFileName() + ""); + result.append(""); + result.append(""); + result.append("" + docOperation.getDocIdentifier() + ""); + result.append(""); + } + String desc = "Document operations are maintained on server asychronously. Each operation is scheduled into a server job queue " + + "and is executed when all previous started jobs in the queue are worked off. Each operation needs some execution time dependent " + + "on the size and the number of pages of the document, the speed of the network connection and the performance of the " + + "server."; + result.append("" + desc + ""); + result.append(""); + } + result.append(""); + } else { + String message = "there are no scheduled jobs (neither finished, queued or executed)"; + result.append("" + message + ""); + } + return result.toString(); + } + + String createHtmlString(ArrayList docOperations) { + // TODO all + StringBuilder result = new StringBuilder(); + result.append(""); + result.append(""); + result.append("" + "Document operation status" + ""); + result.append(""); + result.append(""); + result.append(""); + result.append("

      " + "Document operation status" + "

      "); + if (docOperations != null && ! docOperations.isEmpty()) { + for (int i=0; i"); + result.append(""); + result.append(""); + } else { + String message = "there are no scheduled jobs (neither finished, queued or executed)"; + result.append("" + message + ""); + } + result.append("
      "); + CmsDocOperation docOperation = docOperations.get(i); + result.append("Operation: " + docOperation.getName()); + int jobId = docOperation.getOrderId(); + result.append("

      "); + result.append("Job id: " + jobId); + result.append("

      "); + result.append("Job status: "); + result.append("

        "); + Date start = docOperation.getStart(); + String startStr = "No start time available because job is scheduled into server queue where other jobs have been started earlier"; + if (start != null) + startStr = start.toString(); + result.append("
      • "); + result.append("Started: "); + result.append("" + startStr + ""); + result.append("
      • "); + Date end = docOperation.getEnd(); + String endStr = "No end time available because job is not finished yet"; + if (end != null) + endStr = end.toString(); + result.append("
      • "); + result.append("Finished: "); + result.append("" + endStr + ""); + result.append("
      • "); + String status = docOperation.getStatus(); + result.append("
      • "); + result.append("Description: "); + result.append("" + status + ""); + result.append("
      • "); + String errorMessage = docOperation.getErrorMessage(); + result.append("
      • "); + if (errorMessage != null) { + result.append("Error: "); + result.append("" + errorMessage + ""); + } else { + result.append("Error: "); + result.append("" + "no error" + ""); + } + result.append("
      • "); + result.append("
      "); + if (docOperation.getName().equals("delete")) { + result.append("Destination"); + result.append("
        "); + result.append("
      • "); + result.append("Document identifier: "); + result.append("" + docOperation.getDocIdentifier() + ""); + result.append("
      • "); + result.append("
      "); + } else if (docOperation.getName().equals("create")) { + result.append("Source"); + result.append("
        "); + result.append("
      • "); + result.append("" + docOperation.getSrcUrl() + ""); + String uploadFileName = docOperation.getUploadFileName(); + if (uploadFileName != null) { + result.append("
      • "); + result.append("" + uploadFileName + ""); + result.append("
      • "); + } + result.append(""); + result.append("
      "); + result.append("Destination"); + result.append("
        "); + result.append("
      • "); + result.append("Document identifier: "); + result.append("" + docOperation.getDocIdentifier() + ""); + result.append("
      • "); + result.append("
      "); + } + String desc = "Document operations are maintained on server asychronously. Each operation is scheduled into a server job queue " + + "and is executed when all previous started jobs in the queue are worked off. Each operation needs some execution time dependent " + + "on the size and the number of pages of the document, the speed of the network connection and the performance of the " + + "server."; + result.append("Description: " + desc); + result.append("

      "); + result.append("


      "); + } + result.append("
      "); + result.append(""); + result.append(""); + return result.toString(); + } + + String createErrorString(ArrayList docOperations) { + String error = ""; + if (docOperations != null && ! docOperations.isEmpty()) { + for (int i=0; i"; + String cssShowWordFileName = "pageNormDict.css"; + if (outputFormat.equals("xmlDisplay")) + cssShowWordFileName = "pageOrig.css"; // xml display shows always the original text + else if (normalization.equals("orig") && mode.equals("untokenized")) + cssShowWordFileName = "pageOrig.css"; + else if (normalization.equals("orig") && mode.equals("tokenized")) + cssShowWordFileName = "pageOrigDict.css"; + else if (normalization.equals("reg") && mode.equals("untokenized")) + cssShowWordFileName = "pageReg.css"; + else if (normalization.equals("reg") && mode.equals("tokenized")) + cssShowWordFileName = "pageRegDict.css"; + else if (normalization.equals("norm") && mode.equals("untokenized")) + cssShowWordFileName = "pageNorm.css"; + String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName; + String mainCssLink = ""; + String showWordCssLink = ""; + String head = "" + "" + title + "" + showWordCssLink + mainCssLink + ""; + String namespace = ""; + String pageHtmlStr = null; + if (pageHtmlFile.exists() && outputFormat.equals("html") && (highlightElem == null && highlightQuery == null)) { + pageHtmlStr = FileUtils.readFileToString(pageHtmlFile, "utf-8"); + } else { + if (highlightElem != null || highlightQuery != null) { + String hiQueryType = "orig"; + if (highlightQueryType.equals("morph")) + hiQueryType = "morph"; + else + hiQueryType = normalization; + String language = mdRecord.getLanguage(); + fragmentMorphStr = highlight(fragmentMorphStr, highlightElem, highlightElemPos, hiQueryType, highlightQuery, language); + } + pageHtmlStr = pageTransformer.transform(fragmentMorphStr, mdRecord, page, outputFormat); + } + if (schemaName != null && schemaName.equals("echo")) { + namespace = "xmlns:echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\" xmlns:de=\"http://www.mpiwg-berlin.mpg.de/ns/de/1.0/\" " + + "xmlns:dcterms=\"http://purl.org/dc/terms\" " + "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" " + + "xmlns:xlink=\"http://www.w3.org/1999/xlink\""; + } + result = xmlHeader + "" + head + "" + pageHtmlStr + "" + ""; + } else { + String pageFileNameOrig = docPageDir + "/page-" + page + ".xml"; + File pageFileOrig = new File(pageFileNameOrig); + if (pageFileOrig.exists()) + result = FileUtils.readFileToString(pageFileOrig, "utf-8"); + else + result = ""; + } + out.print(result); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + private String getBaseUrl(HttpServletRequest request) { + return getServerUrl(request) + request.getContextPath(); + } + + private String getServerUrl(HttpServletRequest request) { + if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) + return request.getScheme() + "://" + request.getServerName(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); + } + + private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { + try { + WordContentHandler wordContentHandler = new WordContentHandler(); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(wordContentHandler); + StringReader strReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(strReader); + xmlParser.parse(inputSource); + String result = wordContentHandler.getResult(); + return result; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { + String result = null; + try { + HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language); + highlightContentHandler.setFirstPageBreakReachedMode(true); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(highlightContentHandler); + StringReader stringReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(stringReader); + xmlParser.parse(inputSource); + result = highlightContentHandler.getResult().toString(); + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return result; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetToken.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetToken.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,87 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.lucene.index.Term; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.Token; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; + +public class GetToken extends HttpServlet { + private static final long serialVersionUID = 1L; + public GetToken() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String docId = request.getParameter("docId"); + String attribute = request.getParameter("attribute"); + if (attribute == null) + attribute = "tokenOrig"; + String query = request.getParameter("query"); + String countStr = request.getParameter("count"); + if (countStr == null) + countStr = "100"; + int count = Integer.parseInt(countStr); + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "xml"; + try { + IndexHandler indexHandler = IndexHandler.getInstance(); + ArrayList token = null; + if (docId == null) + token = indexHandler.getToken(attribute, query, count); + else + token = indexHandler.getToken(docId, attribute, query, count); + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + out.print(""); + out.print("" + attribute + ""); + out.print("" + query + ""); + out.print("" + count + ""); + out.print(""); + if (token != null) { + for (int i=0; i"); + out.print("" + term.text() + ""); + if (freq != -1) + out.print("" + freq + ""); + out.print(""); + } + } + out.print(""); + out.print(""); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MoreLikeThis.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MoreLikeThis.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,140 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Date; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.lucene.document.Fieldable; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.Document; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; + +public class MoreLikeThis extends HttpServlet { + private static final long serialVersionUID = 1L; + + public MoreLikeThis() { + super(); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "html"; + String docId = request.getParameter("docId"); + String pageStr = request.getParameter("page"); + if (pageStr == null) + pageStr = "1"; + int page = Integer.parseInt(pageStr); + String pageSizeStr = request.getParameter("pageSize"); + if (pageSizeStr == null) + pageSizeStr = "10"; + int pageSize = Integer.parseInt(pageSizeStr); + int from = (page * pageSize) - pageSize; // e.g. 0 + int to = page * pageSize - 1; // e.g. 9 + PrintWriter out = response.getWriter(); + IndexHandler indexHandler; + try { + Date begin = new Date(); + indexHandler = IndexHandler.getInstance(); + Hits hits = indexHandler.moreLikeThis(docId, from, to); + ArrayList docs = null; + if (hits != null) + docs = hits.getHits(); + int hitsSize = -1; + int docsSize = -1; + if (hits != null) + hitsSize = hits.getSize(); + if (docs != null) + docsSize = docs.size(); + Date end = new Date(); + long elapsedTime = end.getTime() - begin.getTime(); + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + if (outputFormat.equals("xml")) { + out.print(""); + out.print(""); + out.print("" + docId + ""); + out.print("" + page + ""); + out.print("" + pageSize + ""); + out.print(""); + out.print("" + hitsSize + ""); + out.print(""); + for (int i=0; i"); + String similarDocId = doc.getFieldable("docId").stringValue(); + out.print("" + similarDocId + ""); + Fieldable docCollectionNamesField = doc.getFieldable("collectionNames"); + if (docCollectionNamesField != null) { + String docCollectionNames = docCollectionNamesField.stringValue(); + out.print("" + docCollectionNames + ""); + } + ArrayList hitFragments = doc.getHitFragments(); + if (hitFragments != null) { + out.print(""); + for (int j=0; j" + hitFragment + ""); + } + out.print(""); + } + out.print(""); + } + out.print(""); + out.print("" + elapsedTime + ""); + out.print(""); + } else if (outputFormat.equals("html")) { + StringBuilder htmlStrBuilder = new StringBuilder(); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append("Similar documents of: " + docId + ""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append("

      Similar documents of: " + docId + "

      "); + int fromDisplay = from + 1; + int toDisplay = to + 1; + if (hitsSize < to) + toDisplay = hitsSize; + htmlStrBuilder.append("Result: " + fromDisplay + " - " + toDisplay + " of " + hitsSize + " documents" + ""); + htmlStrBuilder.append("
        "); + for (int i=0; i" + num + ". " + similarDocId + ""); + } + htmlStrBuilder.append("
      "); + htmlStrBuilder.append("

      "); + htmlStrBuilder.append("Elapsed time: " + elapsedTime + " ms"); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + out.print(htmlStrBuilder.toString()); + } + out.close(); + } catch (Exception e) { + throw new ServletException(e); + } + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + // TODO Auto-generated method stub + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MpiwgMpdlCmsWebServletContextListener.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/MpiwgMpdlCmsWebServletContextListener.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,67 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import javax.servlet.ServletContext; +import javax.servlet.ServletContextEvent; +import javax.servlet.ServletContextListener; + +import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; +import de.mpg.mpiwg.berlin.mpdl.servlets.cms.MpiwgMpdlCmsWebServletContextListener; +import de.mpg.mpiwg.berlin.mpdl.xml.transform.FragmentTransformer; +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsChainScheduler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer; + +public class MpiwgMpdlCmsWebServletContextListener implements ServletContextListener { + private ServletContext context = null; + private FragmentTransformer fragmentTransformer = null; + private PageTransformer pageTransformer = null; + private TocTransformer tocTransformer = null; + private XQueryEvaluator xQueryEvaluator = null; + public void contextInitialized(ServletContextEvent event) { + try { + this.context = event.getServletContext(); + String documentsDirectory = Constants.getInstance().getDocumentsDir(); + String luceneDocumentsDirectory = Constants.getInstance().getLuceneDocumentsDir(); + String luceneNodesDirectory = Constants.getInstance().getLuceneNodesDir(); + context.setAttribute("documentDirectory", documentsDirectory); + context.setAttribute("luceneDocumentsDirectory", luceneDocumentsDirectory); + context.setAttribute("luceneNodesDirectory", luceneNodesDirectory); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (documentsDirectory= \"" + documentsDirectory + "\", set in constants.properties)"); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (luceneDocumentsDirectory= \"" + luceneDocumentsDirectory + "\", set in constants.properties)"); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (luceneNodesDirectory= \"" + luceneNodesDirectory + "\", set in constants.properties)"); + fragmentTransformer = new FragmentTransformer(); + context.setAttribute("fragmentTransformer", fragmentTransformer); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (fragmentTransformer)"); + pageTransformer = new PageTransformer(); + context.setAttribute("pageTransformer", pageTransformer); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (pageTransformer)"); + tocTransformer = new TocTransformer(); + context.setAttribute("tocTransformer", tocTransformer); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (tocTransformer)"); + xQueryEvaluator = new XQueryEvaluator(); + context.setAttribute("xQueryEvaluator", xQueryEvaluator); + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextInitialized (xQueryEvaluator)"); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public void contextDestroyed(ServletContextEvent event) { + try { + this.context = null; + LexHandler.getInstance().end(); + MorphologyCache.getInstance().end(); + IndexHandler.getInstance().end(); + CmsChainScheduler scheduler = CmsChainScheduler.getInstance(); + scheduler.end(); + Thread.sleep(1000); // with this, also the scheduler worker threads could be closed + System.out.println(MpiwgMpdlCmsWebServletContextListener.class.getName() + ": contextDestroyed"); + } catch (Exception e) { + e.printStackTrace(); + } + } +} \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocument.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,350 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringReader; +import java.util.ArrayList; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.lucene.document.Fieldable; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.Document; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits; +import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; + +public class QueryDocument extends HttpServlet { + private static final long serialVersionUID = 1L; + private PageTransformer pageTransformer = null; + + public QueryDocument() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + ServletContext context = getServletContext(); + pageTransformer = (PageTransformer) context.getAttribute("pageTransformer"); + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String docId = request.getParameter("docId"); + String query = request.getParameter("query"); + String[] normFunctions = {"none"}; + if (query.contains("tokenReg")) // TODO ordentlich behandeln + normFunctions[0] = "reg"; + else if (query.contains("tokenNorm")) // TODO ordentlich behandeln + normFunctions[0] = "norm"; + String[] outputOptions = {}; + if (query.contains("tokenMorph")) { // TODO ordentlich behandeln + outputOptions = new String[1]; + outputOptions[0] = "withLemmas"; + } + String pageStr = request.getParameter("page"); + if (pageStr == null) + pageStr = "1"; + int page = Integer.parseInt(pageStr); + String pageSizeStr = request.getParameter("pageSize"); + if (pageSizeStr == null) + pageSizeStr = "10"; + int pageSize = Integer.parseInt(pageSizeStr); + int from = (page * pageSize) - pageSize; // e.g. 0 + int to = page * pageSize - 1; // e.g. 9 + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "xml"; + try { + IndexHandler indexHandler = IndexHandler.getInstance(); + Hits hits = indexHandler.queryDocument(docId, query, from, to); + MetadataRecord docMetadataRecord = indexHandler.getDocMetadata(docId); + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + String resultStr = ""; + if (outputFormat.equals("xml")) + resultStr = createXmlString(docMetadataRecord, query, page, pageSize, normFunctions, outputOptions, hits); + else if (outputFormat.equals("html")) + resultStr = createHtmlString(docMetadataRecord, query, page, pageSize, normFunctions, outputOptions, hits, request); + out.print(resultStr); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + private String createXmlString(MetadataRecord docMetadataRecord, String query, int page, int pageSize, String[] normFunctions, String[] outputOptions, Hits hits) throws ApplicationException { + String docId = docMetadataRecord.getDocId(); + ArrayList docs = null; + if (hits != null) + docs = hits.getHits(); + int hitsSize = -1; + int docsSize = -1; + if (hits != null) + hitsSize = hits.getSize(); + if (docs != null) + docsSize = docs.size(); + StringBuilder xmlStrBuilder = new StringBuilder(); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + docId + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + query + ""); + xmlStrBuilder.append("" + page + ""); + xmlStrBuilder.append("" + pageSize + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + hitsSize + ""); + xmlStrBuilder.append(""); + for (int i=0; i"); + xmlStrBuilder.append("" + num + ""); + String pageNumber = null; + Fieldable fPageNumber = doc.getFieldable("pageNumber"); + if (fPageNumber != null) { + pageNumber = fPageNumber.stringValue(); + xmlStrBuilder.append("" + pageNumber + ""); + } + String elementPagePosition = null; + Fieldable fElementPagePosition = doc.getFieldable("elementPagePosition"); + if (fElementPagePosition != null) { + elementPagePosition = fElementPagePosition.stringValue(); + xmlStrBuilder.append("" + elementPagePosition + ""); + } + String lineNumber = null; + Fieldable fLineNumber = doc.getFieldable("lineNumber"); + if (fLineNumber != null) { + lineNumber = fLineNumber.stringValue(); + xmlStrBuilder.append("" + lineNumber + ""); + } + String elementPosition = null; + Fieldable fElementPosition = doc.getFieldable("elementAbsolutePosition"); + if (fElementPosition != null) { + elementPosition = fElementPosition.stringValue(); + xmlStrBuilder.append("" + elementPosition + ""); + } + String xpath = null; + Fieldable fXPath = doc.getFieldable("xpath"); + if (fXPath != null) { + xpath = fXPath.stringValue(); + xmlStrBuilder.append("" + xpath + ""); + } + String xmlId = null; + Fieldable fXmlId = doc.getFieldable("xmlId"); + if (fXmlId != null) { + xmlId = fXmlId.stringValue(); + xmlStrBuilder.append("" + xmlId + ""); + } + String language = null; + Fieldable fLanguage = doc.getFieldable("language"); + if (fLanguage != null) { + language = fLanguage.stringValue(); + xmlStrBuilder.append("" + language + ""); + } + String xmlContentTokenized = null; + Fieldable fXmlContentTokenized = doc.getFieldable("xmlContentTokenized"); + if (fXmlContentTokenized != null) { + String highlightQueryType = "orig"; + if (withLemmas(outputOptions)) { + highlightQueryType = "morph"; + } else if (normFunctions != null) { + String normFunction = normFunctions[0]; + highlightQueryType = normFunction; + if (normFunction.equals("none")) { + highlightQueryType = "orig"; + } + } + xmlContentTokenized = fXmlContentTokenized.stringValue(); + String xmlPre = ""; + String xmlPost = ""; + String xmlInputStr = xmlPre + xmlContentTokenized + xmlPost; + String docLanguage = docMetadataRecord.getLanguage(); + String highlightedXmlStr = highlight(xmlInputStr, highlightQueryType, query, docLanguage); + if (highlightedXmlStr == null) + highlightedXmlStr = "" + xmlContentTokenized + ""; + xmlStrBuilder.append(highlightedXmlStr); + } + xmlStrBuilder.append(""); + } + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + return xmlStrBuilder.toString(); + } + + private String createHtmlString(MetadataRecord docMetadataRecord, String query, int page, int pageSize, String[] normFunctions, String[] outputOptions, Hits hits, HttpServletRequest request) throws ApplicationException { + String docId = docMetadataRecord.getDocId(); + ArrayList docs = null; + if (hits != null) + docs = hits.getHits(); + int hitsSize = -1; + int docsSize = -1; + if (hits != null) + hitsSize = hits.getSize(); + if (docs != null) + docsSize = docs.size(); + String highlightQueryType = "orig"; + String normalizationStr = ""; + String highlightQueryTypeStr = ""; + if (withLemmas(outputOptions)) { + highlightQueryTypeStr = "&highlightQueryType=norm"; + highlightQueryType = "norm"; + } else if (normFunctions != null) { + String normFunction = normFunctions[0]; + normalizationStr = "&normalization=" + normFunction; + highlightQueryType = normFunction; + if (normFunction.equals("none")) { + normalizationStr = "&normalization=" + "orig"; + highlightQueryType = "orig"; + } + } + StringBuilder xmlStrBuilder = new StringBuilder(); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("Document: \"" + docId + " " + query + "\""); + String baseUrl = getBaseUrl(request); + String cssUrl = baseUrl + "/css/page.css"; + String cssShowWordFileName = "pageOrig.css"; + if (highlightQueryType.equals("reg")) + cssShowWordFileName = "pageReg.css"; + else if (highlightQueryType.equals("norm")) + cssShowWordFileName = "pageNorm.css"; + String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName; + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("[This is a MPIWG CMS technology service] \"MPIWG"); + xmlStrBuilder.append("Query: " + query + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + page + ""); + xmlStrBuilder.append("" + pageSize + ""); + xmlStrBuilder.append("" + hitsSize + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + for (int i=0; i"); + Document doc = docs.get(i); + int num = (page - 1) * pageSize + i + 1; + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + String xmlContentTokenized = null; + Fieldable fXmlContentTokenized = doc.getFieldable("xmlContentTokenized"); + if (fXmlContentTokenized != null) { + xmlContentTokenized = fXmlContentTokenized.stringValue(); + String highlightedXmlStr = highlight(xmlContentTokenized, highlightQueryType, query, language); + String highlightHtmlStr = pageTransformer.transform(highlightedXmlStr, docMetadataRecord, -1, "html"); // TODO performance: do not highlight each single node but highlight them all in one step + xmlStrBuilder.append(""); + } + xmlStrBuilder.append(""); + } + xmlStrBuilder.append("
      " + num + ". " + ""); + String posStr = ""; + String pageNumber = ""; + Fieldable fPageNumber = doc.getFieldable("pageNumber"); + if (fPageNumber != null) { + pageNumber = fPageNumber.stringValue(); + posStr = posStr + "Page " + pageNumber + ", "; + } + String elementName = null; + String presElementName = ""; + Fieldable fElementName = doc.getFieldable("elementName"); + if (fElementName != null) { + elementName = fElementName.stringValue(); + presElementName = getPresentationName(elementName); + } + String elementPagePosition = ""; + Fieldable fElementPagePosition = doc.getFieldable("elementPagePosition"); + if (fElementPagePosition != null) { + elementPagePosition = fElementPagePosition.stringValue(); + posStr = posStr + presElementName + " " + elementPagePosition + ":"; + } + String language = docMetadataRecord.getLanguage(); + String getPageLink = baseUrl + "/query/GetPage?docId=" + docId + "&page=" + pageNumber + normalizationStr + "&highlightElem=" + elementName + "&highlightElemPos=" + elementPagePosition + highlightQueryTypeStr + "&highlightQuery=" + query + "&language=" + language; + xmlStrBuilder.append("" + posStr + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append(highlightHtmlStr); + xmlStrBuilder.append("
      "); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + return xmlStrBuilder.toString(); + } + + private String highlight(String xmlStr, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { + String result = null; + try { + HighlightContentHandler highlightContentHandler = new HighlightContentHandler(null, -1, highlightQueryType, highlightQuery, language); + highlightContentHandler.setFirstPageBreakReachedMode(true); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(highlightContentHandler); + StringReader stringReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(stringReader); + xmlParser.parse(inputSource); + result = highlightContentHandler.getResult().toString(); + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return result; + } + + private String getPresentationName(String elemName) { + String retStr = null; + if (elemName != null) { + if (elemName.equals("s")) { + retStr = "Sentence"; + } else { + // first char to uppercase + char[] stringArray = elemName.toCharArray(); + stringArray[0] = Character.toUpperCase(stringArray[0]); + retStr = new String(stringArray); + } + } + return retStr; + } + + private String getBaseUrl(HttpServletRequest request) { + return getServerUrl(request) + request.getContextPath(); + } + + private String getServerUrl(HttpServletRequest request) { + if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) + return request.getScheme() + "://" + request.getServerName(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); + } + + private boolean withLemmas(String[] outputOptions) { + boolean result = false; + for (int i=0; i< outputOptions.length; i++) { + String function = outputOptions[i]; + if (function.equals("withLemmas")) + return true; + } + return result; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocuments.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocuments.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,333 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Date; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.Query; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.Document; +import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; + +public class QueryDocuments extends HttpServlet { + private static final long serialVersionUID = 1L; + public QueryDocuments() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String query = request.getParameter("query"); + String sortBy = request.getParameter("sortBy"); + String[] sortFields = null; + if (sortBy != null && ! sortBy.trim().isEmpty()) + sortFields = sortBy.split(" "); + String language = request.getParameter("language"); // optional: is also detected by translator service + if (language != null && language.equals("none")) + language = null; + String translate = request.getParameter("translate"); + String pageStr = request.getParameter("page"); + if (pageStr == null) + pageStr = "1"; + int page = Integer.parseInt(pageStr); + String pageSizeStr = request.getParameter("pageSize"); + if (pageSizeStr == null) + pageSizeStr = "10"; + int pageSize = Integer.parseInt(pageSizeStr); + int from = (page * pageSize) - pageSize; // e.g. 0 + int to = page * pageSize - 1; // e.g. 9 + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "html"; + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + if (query == null) { + out.print("no query specified: please set parameter \"query\""); + out.close(); + return; + } + try { + Date begin = new Date(); + IndexHandler indexHandler = IndexHandler.getInstance(); + boolean translateBool = false; + if (translate != null && translate.equals("true")) + translateBool = true; + boolean withHitHighlights = false; + if (query.contains("tokenOrig:") || query.contains("tokenMorph:") || query.contains("tokenReg:") || query.contains("tokenNorm:")) + withHitHighlights = true; + Hits hits = indexHandler.queryDocuments(query, sortFields, language, from, to, withHitHighlights, translateBool); + ArrayList docs = null; + if (hits != null) + docs = hits.getHits(); + int hitsSize = -1; + int docsSize = -1; + if (hits != null) + hitsSize = hits.getSize(); + if (docs != null) + docsSize = docs.size(); + Date end = new Date(); + long elapsedTime = end.getTime() - begin.getTime(); + if (outputFormat.equals("xml")) { + out.print(""); + out.print(""); + out.print("" + query + ""); + out.print("" + page + ""); + out.print("" + pageSize + ""); + out.print(""); + out.print("" + hitsSize + ""); + out.print(""); + for (int i=0; i"); + String docId = doc.getFieldable("docId").stringValue(); + out.print("" + docId + ""); + Fieldable docCollectionNamesField = doc.getFieldable("collectionNames"); + if (docCollectionNamesField != null) { + String docCollectionNames = docCollectionNamesField.stringValue(); + out.print("" + docCollectionNames + ""); + } + ArrayList hitFragments = doc.getHitFragments(); + if (hitFragments != null) { + out.print(""); + for (int j=0; j" + hitFragment + ""); + } + out.print(""); + } + out.print(""); + } + out.print(""); + out.print("" + elapsedTime + ""); + out.print(""); + } else if (outputFormat.equals("html")) { + StringBuilder htmlStrBuilder = new StringBuilder(); + String baseUrl = getBaseUrl(request); + String cssUrl = baseUrl + "/css/page.css"; + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append("Query: " + query + ""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append("
      [This is a MPIWG CMS technology service] \"MPIWG
      "); + htmlStrBuilder.append("

      "); + String luceneQueryStr = query; + Query luceneQuery = hits.getQuery(); + if (query != null) + luceneQueryStr = luceneQuery.toString(); + String sortByStr = sortBy; + if (sortBy == null) + sortByStr = ""; + htmlStrBuilder.append("

      Query: " + luceneQueryStr + ", sorted by: " + sortByStr + "

      "); + htmlStrBuilder.append("
      "); + htmlStrBuilder.append(""); + if (translate != null) + htmlStrBuilder.append(""); + if (language != null) + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + int countPages = hitsSize / 10 + 1; + if (hitsSize % 10 == 0) // modulo operator: e.g. 280 % 10 is 0 + countPages = hitsSize / 10; + int pageLeft = page - 1; + if (page == 1) + pageLeft = 1; + int pageRight = page + 1; + if (page == countPages) + pageRight = countPages; + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + int fromDisplay = from + 1; + int toDisplay = to + 1; + if (hitsSize < to) + toDisplay = hitsSize; + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append("
      " + page + " / " + countPages + "Page: " + fromDisplay + " - " + toDisplay + " of " + hitsSize + " documents" + "
      "); + htmlStrBuilder.append("

      "); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + for (int i=0; i"); + int num = (page - 1) * pageSize + i + 1; + htmlStrBuilder.append(""); + Fieldable authorField = doc.getFieldable("author"); + String author = ""; + if (authorField != null) + author = authorField.stringValue(); + htmlStrBuilder.append(""); + Fieldable titleField = doc.getFieldable("title"); + String title = ""; + if (titleField != null) + title = titleField.stringValue(); + htmlStrBuilder.append(""); + Fieldable publisherField = doc.getFieldable("publisher"); + String publisher = ""; + if (publisherField != null) + publisher = publisherField.stringValue(); + htmlStrBuilder.append(""); + Fieldable yearField = doc.getFieldable("date"); + String year = ""; + if (yearField != null) + year = yearField.stringValue(); + htmlStrBuilder.append(""); + String docId = doc.getFieldable("docId").stringValue(); + htmlStrBuilder.append(""); + Fieldable lastModifiedField = doc.getFieldable("lastModified"); + String lastModified = ""; + if (lastModifiedField != null) + lastModified = lastModifiedField.stringValue(); + htmlStrBuilder.append(""); + Fieldable languageField = doc.getFieldable("language"); + String lang = ""; + if (languageField != null) + lang = languageField.stringValue(); + htmlStrBuilder.append(""); + Fieldable schemaNameField = doc.getFieldable("schemaName"); + String schemaName = ""; + if (schemaNameField != null) + schemaName = schemaNameField.stringValue(); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + // Link row + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + // hit fragments row + ArrayList hitFragments = doc.getHitFragments(); + if (hitFragments != null) { + StringBuilder hitFragmentsStrBuilder = new StringBuilder(); + hitFragmentsStrBuilder.append("Hit summary: "); + hitFragmentsStrBuilder.append("(...) "); + for (int j=0; j"); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + } + } + htmlStrBuilder.append(""); + htmlStrBuilder.append("
      " + "No" + "" + "" + "" + "" + "" + "" + "" + "" + "
      " + num + ". " + "" + author + "" + title + "" + publisher + "" + year + "" + docId + "" + lastModified + "" + lang + "" + schemaName + "
      "); + Fieldable echoIdField = doc.getFieldable("echoId"); + String echoId = null; + if (echoIdField != null) + echoId = echoIdField.stringValue(); + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId; + if (echoId == null) + urlDocuView = "http://echo.mpiwg-berlin.mpg.de/"; + htmlStrBuilder.append("" + " Full view"); + htmlStrBuilder.append(", " + "Lite view"); + htmlStrBuilder.append("
      "); + htmlStrBuilder.append("" + " Download: XML"); + DocumentHandler docHandler = new DocumentHandler(); + String destFileNamePdf = docHandler.getFullFileName(docId, "pdf"); + String destFileNameHtml = docHandler.getFullFileName(docId, "html"); + int lastDot = docId.lastIndexOf("."); + String docIdWithoutExtension = docId.substring(0, lastDot); + File destFilePdf = new File(destFileNamePdf); + File destFileHtml = new File(destFileNameHtml); + if (destFilePdf.exists()) + htmlStrBuilder.append(", " + "PDF"); + if (destFileHtml.exists()) + htmlStrBuilder.append(", " + "HTML"); + htmlStrBuilder.append("
      " + hitFragmentsStrBuilder.toString() + "
      "); + htmlStrBuilder.append("

      "); + htmlStrBuilder.append("

      "); + htmlStrBuilder.append("Elapsed time: " + elapsedTime + " ms"); + htmlStrBuilder.append(""); + htmlStrBuilder.append(""); + out.print(htmlStrBuilder.toString()); + } + out.close(); + } catch (Exception e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + private String getBaseUrl(HttpServletRequest request) { + return getServerUrl(request) + request.getContextPath(); + } + + private String getServerUrl(HttpServletRequest request) { + if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) + return request.getScheme() + "://" + request.getServerName(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/XQueryDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/XQueryDocument.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,228 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.cms; + +import java.io.IOException; +import java.io.PrintWriter; +import java.net.URL; +import java.util.ArrayList; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; +import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.Hit; +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.Hits; +import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; + +public class XQueryDocument extends HttpServlet { + private static final long serialVersionUID = 1L; + private XQueryEvaluator xQueryEvaluator = null; + private PageTransformer pageTransformer = null; + + public XQueryDocument() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + ServletContext context = getServletContext(); + xQueryEvaluator = (XQueryEvaluator) context.getAttribute("xQueryEvaluator"); + pageTransformer = (PageTransformer) context.getAttribute("pageTransformer"); + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String docId = request.getParameter("docId"); + String query = request.getParameter("query"); + String pageStr = request.getParameter("page"); + if (pageStr == null) + pageStr = "1"; + int page = Integer.parseInt(pageStr); + String pageSizeStr = request.getParameter("pageSize"); + if (pageSizeStr == null) + pageSizeStr = "10"; + int pageSize = Integer.parseInt(pageSizeStr); + int from = (page * pageSize) - pageSize; // e.g. 0 + int to = page * pageSize - 1; // e.g. 9 + String outputFormat = request.getParameter("outputFormat"); + if (outputFormat == null) + outputFormat = "xml"; + try { + DocumentHandler docHandler = new DocumentHandler(); + String docFileName = docHandler.getDocFullFileName(docId); + URL docFileUrl = new URL("file:" + docFileName); + Hits hits = null; + String errorStr = null; + try { + hits = xQueryEvaluator.evaluate(docFileUrl, query, from, to); + } catch (ApplicationException e) { + errorStr = e.getLocalizedMessage(); + } + IndexHandler indexHandler = IndexHandler.getInstance(); + MetadataRecord docMetadataRecord = indexHandler.getDocMetadata(docId); + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + String resultStr = ""; + if (errorStr == null) { + if (outputFormat.equals("xml")) + resultStr = createXmlString(docMetadataRecord, query, page, pageSize, hits); + else if (outputFormat.equals("html")) + resultStr = createHtmlString(docMetadataRecord, query, page, pageSize, hits, request); + } else { + resultStr = "Saxon XQuery error: " + errorStr; + } + out.print(resultStr); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + private String createXmlString(MetadataRecord docMetadataRecord, String query, int page, int pageSize, Hits hits) throws ApplicationException { + String docId = docMetadataRecord.getDocId(); + ArrayList hitsArray = null; + if (hits != null) + hitsArray = hits.getHits(); + int hitsSize = -1; + int hitsArraySize = -1; + if (hits != null) + hitsSize = hits.getSize(); + if (hitsArray != null) + hitsArraySize = hitsArray.size(); + StringBuilder xmlStrBuilder = new StringBuilder(); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + docId + ""); + xmlStrBuilder.append(""); + String queryXml = StringUtils.deresolveXmlEntities(query); + xmlStrBuilder.append("" + queryXml + ""); + xmlStrBuilder.append("" + page + ""); + xmlStrBuilder.append("" + pageSize + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + hitsSize + ""); + xmlStrBuilder.append(""); + for (int i=0; i"); + Hit hit = hitsArray.get(i); + String name = hit.getName(); + String typeStr = "ELEMENT"; + int type = hit.getType(); + if (type == Hit.TYPE_ATTRIBUTE) + typeStr = "ATTRIBUTE"; + else if (type == Hit.TYPE_ATOMIC_VALUE) + typeStr = "ATOMIV_VALUE"; + int docPage = hit.getPage(); + int hitPagePosition = hit.getHitPagePosition(); + String xmlContent = hit.getContent(); + if (name != null) + xmlStrBuilder.append("" + name + ""); + xmlStrBuilder.append("" + typeStr + ""); + if (docPage != -1) { + xmlStrBuilder.append("" + docPage + ""); + xmlStrBuilder.append("" + hitPagePosition + ""); + } + xmlStrBuilder.append("" + xmlContent + ""); + xmlStrBuilder.append(""); + } + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + return xmlStrBuilder.toString(); + } + + private String createHtmlString(MetadataRecord docMetadataRecord, String query, int page, int pageSize, Hits hits, HttpServletRequest request) throws ApplicationException { + String docId = docMetadataRecord.getDocId(); + ArrayList hitsArray = null; + if (hits != null) + hitsArray = hits.getHits(); + int hitsArraySize = -1; + if (hitsArray != null) + hitsArraySize = hitsArray.size(); + int hitsSize = hits.getSize(); + StringBuilder xmlStrBuilder = new StringBuilder(); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("Document: \"" + query + "\""); + String baseUrl = getBaseUrl(request); + String cssUrl = baseUrl + "/css/page.css"; + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("[This is a MPIWG CMS technology service] \"MPIWG"); + xmlStrBuilder.append("XQuery: " + query + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append("" + page + ""); + xmlStrBuilder.append("" + pageSize + ""); + xmlStrBuilder.append("" + hitsSize + ""); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + if (hitsSize == 1 && hitsArray.get(0).getType() == Hit.TYPE_ATOMIC_VALUE) { + Hit hit = hitsArray.get(0); + String xmlContent = hit.getContent(); + xmlContent = StringUtils.deresolveXmlEntities(xmlContent); + xmlStrBuilder.append(xmlContent); + } else { + for (int i=0; i"); + Hit hit = hitsArray.get(i); + int docPage = hit.getPage(); + String hitName = hit.getName(); + int hitType = hit.getType(); + int hitPagePosition = hit.getHitPagePosition(); + String getPageLink = baseUrl + "/query/GetPage?docId=" + docId + "&page=" + docPage + "&outputFormat=" + "xmlDisplay" + "&highlightElem=" + hitName + "&highlightElemPos=" + hitPagePosition; + String hitPres = hitName + "[" + hitPagePosition + "]"; + if (hitType == Hit.TYPE_ATTRIBUTE) { + hitPres = "@" + hitName; + getPageLink = baseUrl + "/query/GetPage?docId=" + docId + "&page=" + docPage; + } + String posStr = "Page " + docPage + ", " + hitPres + ":"; + int num = (page - 1) * pageSize + i + 1; + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + String xmlContent = hit.getContent(); + String htmlContent = pageTransformer.transform(xmlContent, docMetadataRecord, -1, "xmlDisplay"); // TODO performance: do not transform each single hit but transform then all in one step + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + } + } + xmlStrBuilder.append("
      " + num + ". " + ""); + if (docPage != -1) { + xmlStrBuilder.append("" + posStr + ""); + } + xmlStrBuilder.append(""); + xmlStrBuilder.append(htmlContent); + xmlStrBuilder.append("
      "); + xmlStrBuilder.append(""); + xmlStrBuilder.append(""); + return xmlStrBuilder.toString(); + } + + private String getBaseUrl(HttpServletRequest request) { + return getServerUrl(request) + request.getContextPath(); + } + + private String getServerUrl(HttpServletRequest request) { + if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) + return request.getScheme() + "://" + request.getServerName(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetDictionaryEntries.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetDictionaryEntries.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,485 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.lt; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.lang3.StringEscapeUtils; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexica; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; +import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer; +import de.mpg.mpiwg.berlin.mpdl.servlets.util.ServletUtil; + +public class GetDictionaryEntries extends HttpServlet { + private static final long serialVersionUID = 1L; + private LexHandler lexHandler; + + public GetDictionaryEntries() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + try { + lexHandler = LexHandler.getInstance(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + Date begin = new Date(); + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String query = request.getParameter("query"); + String queryDisplay = request.getParameter("queryDisplay"); + String language = request.getParameter("language"); + String inputType = request.getParameter("inputType"); + String outputFormat = request.getParameter("outputFormat"); + String[] outputTypesArray = request.getParameterValues("outputType"); + String dictionary = request.getParameter("dictionary"); + String normalization = request.getParameter("normalization"); + String normalizationType = request.getParameter("normalizationType"); + String resultPageNumber = request.getParameter("resultPageNumber"); + String resultPageSize = request.getParameter("resultPageSize"); + if (query == null) + query = "a*"; + boolean isRangeQuery = false; + if (query.endsWith("*")) + isRangeQuery = true; + if (queryDisplay == null) + queryDisplay = query; + if (language == null) + language = "eng"; + if (inputType == null || ! (inputType.equals("form") || inputType.equals("lemma"))) + inputType = "form"; + if (outputFormat == null || ! (outputFormat.equals("xml") || outputFormat.equals("html") || outputFormat.equals("htmlFragment"))) + outputFormat = "xml"; + ArrayList outputTypes = new ArrayList(); + if (outputTypesArray == null) { + outputTypes.add("allCompact"); + } else if (isRangeQuery) { + outputTypes.add("dictCompact"); + } else { + outputTypes = new ArrayList(Arrays.asList(outputTypesArray)); + } + if (normalization == null || ! (normalization.equals("none") || normalization.equals("reg") || normalization.equals("reg norm"))) + normalization = "norm"; + if (normalizationType == null || ! (normalizationType.equals("display") || normalizationType.equals("dictionary"))) + normalizationType = "dictionary"; + String xmlDict = "all"; + if (dictionary != null) + xmlDict = dictionary; + int pageNumber = 1; + int pageSize = 50; + if (resultPageNumber != null) + pageNumber = new Integer(resultPageNumber); + if (resultPageSize != null) + pageSize = new Integer(resultPageSize); + String xmlQueryString = ""; + xmlQueryString = xmlQueryString + "" + query + ""; + xmlQueryString = xmlQueryString + "" + queryDisplay + ""; + xmlQueryString = xmlQueryString + "" + language + ""; + xmlQueryString = xmlQueryString + "" + inputType + ""; + xmlQueryString = xmlQueryString + "" + outputFormat + ""; + xmlQueryString = xmlQueryString + "" + outputTypes + ""; + xmlQueryString = xmlQueryString + "" + xmlDict + ""; + xmlQueryString = xmlQueryString + "" + normalization + ""; + xmlQueryString = xmlQueryString + "" + normalizationType + ""; + if (isRangeQuery) { + xmlQueryString = xmlQueryString + "" + pageNumber + ""; + xmlQueryString = xmlQueryString + "" + pageSize + ""; + } + xmlQueryString = xmlQueryString + ""; + try { + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html") || outputFormat.equals("htmlFragment")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + if (query == null || query.isEmpty()) { + out.print("request parameter \"query\" is empty. Please specify a query."); + out.close(); + return; + } + ArrayList lemmas = null; + ArrayList dictionaries = null; + if (isRangeQuery) { + String queryTmp = query.substring(0, query.length() - 1); // without last star + if (dictionary != null) + dictionaries = lexHandler.getLexEntriesByLexiconBeginningWith(dictionary, queryTmp, pageNumber, pageSize); + else + dictionaries = lexHandler.getLexEntriesBeginningWith(language, queryTmp, pageNumber, pageSize); + } else { + String lang = language; + if (dictionary != null) { + Lexicon lexicon = Lexica.getInstance().getLexicon(dictionary); + if (lexicon != null) + lang = lexicon.getSourceLanguage(); + } + int normMode = Normalizer.DICTIONARY; + if (normalization.equals("none")) + normMode = Normalizer.NONE; + else if (normalizationType.equals("display")) + normMode = Normalizer.DISPLAY; + lemmas = lexHandler.getLemmas(query, inputType, lang, normMode, true); + dictionaries = lexHandler.getLexEntries(lemmas, lang, dictionary, query); + } + String baseUrl = ServletUtil.getInstance().getBaseUrl(request); + Date end = new Date(); + String elapsedTime = String.valueOf(end.getTime() - begin.getTime()); + String result = ""; + if (outputFormat == null || outputFormat.equals("xml")) + result = createXmlOutputString(query, lemmas, dictionaries, outputTypes, baseUrl, xmlQueryString, elapsedTime); + else if (outputFormat.equals("html") || outputFormat.equals("htmlFragment")) + result = createHtmlOutputString(query, queryDisplay, language, lemmas, dictionaries, pageNumber, pageSize, isRangeQuery, outputFormat, outputTypes, elapsedTime); + else + result = createXmlOutputString(query, lemmas, dictionaries, outputTypes, baseUrl, xmlQueryString, elapsedTime); + out.print(result); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + private String createXmlOutputString(String query, ArrayList lemmas, ArrayList lexicons, ArrayList outputTypes, String baseUrl, String xmlQueryString, String elapsedTime) { + boolean outputTypeMorphCompact = false; + if (outputTypes.contains("allCompact") || outputTypes.contains("morphCompact")) + outputTypeMorphCompact = true; + boolean outputTypeMorphFull = false; + if (outputTypes.contains("allFull") || outputTypes.contains("morphFull")) + outputTypeMorphFull = true; + boolean outputTypeDictCompact = false; + if (outputTypes.contains("allCompact") || outputTypes.contains("dictCompact")) + outputTypeDictCompact = true; + boolean outputTypeDictFull = false; + if (outputTypes.contains("allFull") || outputTypes.contains("dictFull")) + outputTypeDictFull = true; + boolean outputTypeWikiCompact = false; + if (outputTypes.contains("allCompact") || outputTypes.contains("wikiCompact")) + outputTypeWikiCompact = true; + boolean outputTypeWikiFull = false; + if (outputTypes.contains("allFull") || outputTypes.contains("wikiFull")) + outputTypeWikiFull = true; + String result = ""; + result = result + "" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + ""; + result = result + xmlQueryString; + result = result + "" + elapsedTime + ""; + if ((outputTypeMorphCompact || outputTypeMorphFull) && lemmas != null && ! lemmas.isEmpty()) { + result = result + ""; + for (int i=0; i"; + result = result + "" + lemmaName + ""; + if (outputTypeMorphFull) { + String lemmaProvider = lemma.getProvider(); + result = result + "" + lemmaProvider + ""; + result = result + "" + language + ""; + } + if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) { + String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + language; + result = result + "" + remoteUrl + ""; + } else if (Language.getInstance().isGreek(language)) { + String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + "greek"; + result = result + "" + remoteUrl + ""; + } + if (outputTypeMorphFull) { + ArrayList

      forms = lemma.getFormsList(); + Collections.sort(forms); + if (forms != null && ! forms.isEmpty()) { + result = result + ""; + for (int j=0; j"; + Form f = forms.get(j); + String formName = f.getFormName(); + String formProvider = f.getProvider(); + result = result + "" + formProvider + ""; + result = result + "" + language + ""; + result = result + "" + formName + ""; + result = result + ""; + } + result = result + "
      "; + } + } + result = result + ""; + } + result = result + ""; + } + if ((outputTypeDictCompact || outputTypeDictFull) && lexicons != null) { + result = result + ""; + for (int i=0; i"; + } + if ((outputTypeWikiCompact || outputTypeWikiFull) && lemmas != null && ! lemmas.isEmpty()) { + result = result + ""; + String language = null; + for (int i=0; i"; + result = result + "" + lemmaName + ""; + String wikiHrefExact = "http://" + language + ".wikipedia.org/wiki/" + lemmaName; + String wikiHrefSearch = "http://" + language + ".wikipedia.org/wiki/index.php?search=" + lemmaName; + result = result + "" + wikiHrefExact + ""; + result = result + "" + wikiHrefSearch + ""; + result = result + ""; + } + if (language != null && Language.getInstance().isGerman(language) && query != null) { + String[] queryFormNames = query.split(" "); + for (int j=0; j"; + result = result + "" + queryFormName + ""; + String wikiHrefExact = "http://" + language + ".wikipedia.org/wiki/" + queryFormName; + String wikiHrefSearch = "http://" + language + ".wikipedia.org/wiki/index.php?search=" + queryFormName; + result = result + "" + wikiHrefExact + ""; + result = result + "" + wikiHrefSearch + ""; + result = result + ""; + } + } + result = result + ""; + } + result = result + ""; + return result; + } + + private String createHtmlOutputString(String query, String queryDisplay, String language, ArrayList lemmas, ArrayList lexicons, int pageNumber, int pageSize, boolean isRangeQuery, String outputFormat, ArrayList outputTypes, String elapsedTime) { + boolean outputTypeMorphCompact = false; + if (outputTypes.contains("allCompact") || outputTypes.contains("morphCompact")) + outputTypeMorphCompact = true; + boolean outputTypeMorphFull = false; + if (outputTypes.contains("allFull") || outputTypes.contains("morphFull")) + outputTypeMorphFull = true; + boolean outputTypeDictCompact = false; + if (outputTypes.contains("allCompact") || outputTypes.contains("dictCompact")) + outputTypeDictCompact = true; + boolean outputTypeDictFull = false; + if (outputTypes.contains("allFull") || outputTypes.contains("dictFull")) + outputTypeDictFull = true; + boolean outputTypeWikiCompact = false; + if (outputTypes.contains("allCompact") || outputTypes.contains("wikiCompact")) + outputTypeWikiCompact = true; + boolean outputTypeWikiFull = false; + if (outputTypes.contains("allFull") || outputTypes.contains("wikiFull")) + outputTypeWikiFull = true; + String result = ""; + result = result + ""; + result = result + ""; + result = result + "WordInfo for: \"" + queryDisplay + "\""; + result = result + ""; + result = result + getJavascriptFunctions(); + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + "
      [This is a MPIWG MPDL language technology service] \"MPIWG
      "; + result = result + "

      "; + result = result + "

      WordInfo
      "; + + result = result + "
      "; + result = result + "Query: "; + String htmlSelectBox = Language.getInstance().getHtmlSelectBox(language); + result = result + " Language: " + htmlSelectBox; + result = result + ""; + for (int i=0; i"; + } + result = result + " "; + result = result + ""; + result = result + "
      "; + + if ((outputTypeMorphCompact || outputTypeMorphFull) && lemmas != null && ! lemmas.isEmpty()) { + result = result + "Morphology"; + result = result + ""; + result = result + "
        "; + result = result + "
      • " + "" + "Lemmata" + ""; + result = result + "
          "; + for (int i=0; i" + lemmaName + ")"; + else if (Language.getInstance().isGreek(language)) + externalLinkText = " (external link: " + lemmaName + ")"; + ArrayList
          forms = lemma.getFormsList(); + Collections.sort(forms); + if (forms != null && ! forms.isEmpty()) { + formsText = formsText + "
            "; + formsText = formsText + "" + "Forms: " + ""; + for (int j=0; j"; + } + if (outputTypeMorphFull) { + result = result + "
          • " + formsText + "
          • "; + } else if (outputTypeMorphCompact) { + result = result + "
          • " + formsText + "
          • "; + } + } + result = result + "
          "; + result = result + ""; + result = result + "
        "; + result = result + ""; + } + if ((outputTypeDictCompact || outputTypeDictFull) && lexicons != null && ! lexicons.isEmpty()) { + if (isRangeQuery) { + int pageNumberUp = 1; + if (pageNumber > 1) + pageNumberUp = pageNumber - 1; + int pageNumberDown = pageNumber + 1; + int from = (pageNumber * pageSize) - pageSize + 1; + int to = pageNumber * pageSize; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + "
        Page: \"page" + pageNumber + "\"page(" + from + " - " + to + ")
        "; + } + result = result + "Dictionary"; + result = result + ""; + result = result + "
          "; + for (int i=0; i"; + result = result + "" + lexicon.getDescription() + ""; + result = result + "
            "; + ArrayList entries = lexicon.getEntries(); + for (int j=0; j", ""); + repairedEntry = repairedEntry.replaceAll("", ""); + entryContent = entryContent + repairedEntry; // valid unicode content of the original entry + } else { + entryContent = entryContent + "[Remark: this dictionary entry has no valid XML/HTML content in database so a text version of this entry is shown.]:
            "; + String originalEntry = entry.getOriginalEntry(); + originalEntry = originalEntry.replaceAll("", ""); + originalEntry = originalEntry.replaceAll("", ""); + originalEntry = StringEscapeUtils.escapeXml(originalEntry); // create text version of the invalid xml content + entryContent = entryContent + originalEntry; + } + if (entry.getRemoteUrl() != null) { + entryContent = entryContent + ""; + } + } else { + if (entry.getRemoteUrl() != null) { + entryContent = entryContent + "external link: " + entry.getFormName() + ""; + } + } + String formName = entry.getFormName(); + String dictName = lexicon.getName(); + if (outputTypeDictFull) { + result = result + "
            • " + entryContent + "
          • "; + } else if (outputTypeDictCompact) { + result = result + "
            • " + entryContent + "
          • "; + } + } + result = result + "
          "; + result = result + ""; // lexicon entry + } + result = result + "
        "; + result = result + "
        "; + } + if ((outputTypeWikiCompact || outputTypeWikiFull) && lemmas != null && ! lemmas.isEmpty()) { + String langId = Language.getInstance().getLanguageId(language); + String wikiUrl = langId + ".wikipedia.org"; + result = result + "Wikipedia"; + result = result + ""; + result = result + "
          "; + result = result + "
        • " + "" + wikiUrl + ""; + result = result + "
            "; + for (int i=0; i" + ""; + } + if (language != null && Language.getInstance().isGerman(language) && query != null) { + String[] queryFormNames = query.split(" "); + for (int j=0; j"; + String wikiHrefExact = "http://" + wikiUrl + "/wiki/" + queryFormName; + String wikiHrefSearch = "http://" + wikiUrl + "/wiki/index.php?search=" + queryFormName; + result = result + "
          • " + "
          • "; + result = result + ""; + } + } + result = result + "
          "; + result = result + "
        • "; + result = result + "
        "; + result = result + "
        "; + } + result = result + "

        "; + result = result + "[* external links may not function]"; + result = result + "


        "; + result = result + "

        "; + result = result + "Elapsed time: " + elapsedTime + " ms, see the service description of this page, if you find a bug let us know"; + result = result + ""; + result = result + ""; + return result; + } + + private String getJavascriptFunctions() { + String result = ""; + return result; + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetForms.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetForms.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,214 @@ +package de.mpg.mpiwg.berlin.mpdl.servlets.lt; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.Hashtable; + +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; +import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer; + +public class GetForms extends HttpServlet { + private static final long serialVersionUID = 1L; + private LexHandler lexHandler; + + public GetForms() { + super(); + } + + public void init(ServletConfig config) throws ServletException { + super.init(config); + try { + lexHandler = LexHandler.getInstance(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + Date begin = new Date(); + request.setCharacterEncoding("utf-8"); + response.setCharacterEncoding("utf-8"); + String query = request.getParameter("query"); + String language = request.getParameter("language"); + String outputFormat = request.getParameter("outputFormat"); + String outputType = request.getParameter("outputType"); + String normalization = request.getParameter("normalization"); + if (language == null) + language = "eng"; + if (outputFormat == null || ! (outputFormat.equals("xml") || outputFormat.equals("html") || outputFormat.equals("string"))) + outputFormat = "xml"; + if (outputType == null || ! (outputType.equals("compact") || outputType.equals("full"))) + outputType = "compact"; + if (normalization == null || ! (normalization.equals("none") || normalization.equals("reg") || normalization.equals("reg norm"))) + normalization = "norm"; + int normalizationType = Normalizer.DICTIONARY; + if (normalization != null && normalization.equals("none")) + normalizationType = Normalizer.NONE; + + String xmlQueryString = "" + query + "" + "" + language + "" + + "" + outputFormat + "" + "" + outputType + "" + "" + normalization + "" + ""; + try { + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html") || outputFormat.equals("string")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + if (query == null || query.isEmpty()) { + out.print("request parameter query is empty. Please specify a query."); + out.close(); + return; + } + ArrayList lemmas = lexHandler.getLemmas(query, "lemma", language, normalizationType, true); + Hashtable formsHashtable = new Hashtable(); + ArrayList forms = new ArrayList(); + if (lemmas != null && ! lemmas.isEmpty()) { + for (int i=0; i lemmaForms = lemma.getFormsList(); + for (int j=0; j < lemmaForms.size(); j++) { + Form form = lemmaForms.get(j); + formsHashtable.put(form.getFormName(), form); + } + } + } + forms.addAll(formsHashtable.values()); + Collections.sort(forms); + String baseUrl = getBaseUrl(request); + Date end = new Date(); + String elapsedTime = String.valueOf(end.getTime() - begin.getTime()); + String result = ""; + if (outputFormat == null || outputFormat.equals("xml")) + result = createXmlOutputString(query, forms, outputType, baseUrl, xmlQueryString, elapsedTime); + else if (outputFormat.equals("html")) + result = createHtmlOutputString(query, forms, outputType, elapsedTime); + else if (outputFormat.equals("string")) + result = createStringOutputString(forms); + else + result = createXmlOutputString(query, forms, outputType, baseUrl, xmlQueryString, elapsedTime); + out.print(result); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + private String getBaseUrl( HttpServletRequest request ) { + if (request.getServerPort() == 80 || request.getServerPort() == 443) + return request.getScheme() + "://" + request.getServerName() + request.getContextPath(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getContextPath(); + } + + + private String createXmlOutputString(String query, ArrayList forms, String outputType, String baseUrl, String xmlQueryString, String elapsedTime) { + String result = ""; + result = result + "" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + ""; + result = result + xmlQueryString; + result = result + "" + elapsedTime + ""; + if (forms != null && ! forms.isEmpty()) { + result = result + ""; + result = result + ""; + for (int i=0; i"; + Form f = forms.get(i); + String formName = f.getFormName(); + String language = f.getLanguage(); + String formProvider = f.getProvider(); + String lemmaName = f.getLemmaName(); + result = result + "" + formProvider + ""; + result = result + "" + language + ""; + result = result + "" + lemmaName + ""; + result = result + "" + formName + ""; + result = result + ""; + } + result = result + ""; + result = result + ""; + } + result = result + ""; + return result; + } + + private String createHtmlOutputString(String query, ArrayList

        forms, String outputType, String elapsedTime) { + String result = ""; + result = result + ""; + result = result + ""; + result = result + "Lemmas for: \"" + query + "\""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + "
        [This is a MPIWG MPDL language technology service] \"MPIWG
        "; + result = result + "

        "; + result = result + "

        Forms for: \"" + query + "\"

        "; + if (forms != null && ! forms.isEmpty()) { + result = result + "

        Morphology

        "; + result = result + "
          "; + result = result + "

          "; + if (outputType != null && outputType.equals("full")) { + for (int i=0; i"; + Form f = forms.get(i); + String formName = f.getFormName(); + String formProvider = f.getProvider(); + String language = f.getLanguage(); + String lemmaName = f.getLemmaName(); + result = result + formName + " (data provider: " + formProvider + ", language: " + language + ", lemmaName: " + lemmaName + ")"; + result = result + ""; + } + } else if (outputType == null || outputType.equals("compact")) { + result = result + "

        • "; + for (int i=0; i"; + } else if (outputType.equals("string")) { + for (int i=0; i"; + result = result + "

          "; + result = result + "Elapsed time: " + elapsedTime + " ms, see the service description of this page, if you find a bug let us know"; + result = result + ""; + result = result + ""; + return result; + } + + private String createStringOutputString(ArrayList forms) { + String result = ""; + for (int i=0; i" + "" + language + "" + "" + inputType + "" + + "" + outputFormat + "" + "" + outputType + "" + "" + normalization + "" + ""; + try { + if (outputFormat.equals("xml")) + response.setContentType("text/xml"); + else if (outputFormat.equals("html") || outputFormat.equals("string")) + response.setContentType("text/html"); + else + response.setContentType("text/xml"); + PrintWriter out = response.getWriter(); + if (query == null || query.isEmpty()) { + out.print("request parameter query is empty. Please specify a query."); + out.close(); + return; + } + ArrayList lemmas = lexHandler.getLemmas(query, inputType, language, normalizationType, true); + String baseUrl = getBaseUrl(request); + Date end = new Date(); + String elapsedTime = String.valueOf(end.getTime() - begin.getTime()); + String result = ""; + if (outputFormat == null || outputFormat.equals("xml")) + result = createXmlOutputString(query, lemmas, outputType, baseUrl, xmlQueryString, elapsedTime); + else if (outputFormat.equals("html")) + result = createHtmlOutputString(query, lemmas, outputType, elapsedTime); + else if (outputFormat.equals("string")) + result = createStringOutputString(lemmas); + else + result = createXmlOutputString(query, lemmas, outputType, baseUrl, xmlQueryString, elapsedTime); + out.print(result); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + + private String getBaseUrl( HttpServletRequest request ) { + if (request.getServerPort() == 80 || request.getServerPort() == 443) + return request.getScheme() + "://" + request.getServerName() + request.getContextPath(); + else + return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getContextPath(); + } + + + private String createXmlOutputString(String query, ArrayList lemmas, String outputType, String baseUrl, String xmlQueryString, String elapsedTime) { + String result = ""; + result = result + "" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + ""; + result = result + xmlQueryString; + result = result + "" + elapsedTime + ""; + if (lemmas != null && ! lemmas.isEmpty()) { + result = result + ""; + for (int i=0; i"; + result = result + "" + lemmaName + ""; + if (outputType != null && outputType.equals("full")) { + String lemmaProvider = lemma.getProvider(); + result = result + "" + lemmaProvider + ""; + result = result + "" + language + ""; + } + if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) { + String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + language; + result = result + "" + remoteUrl + ""; + } else if (Language.getInstance().isGreek(language)) { + String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + "greek"; + result = result + "" + remoteUrl + ""; + } + if (outputType != null && outputType.equals("full")) { + ArrayList forms = lemma.getFormsList(); + Collections.sort(forms); + if (forms != null && ! forms.isEmpty()) { + result = result + ""; + for (int j=0; j"; + Form f = forms.get(j); + String formName = f.getFormName(); + String formProvider = f.getProvider(); + result = result + "" + formProvider + ""; + result = result + "" + language + ""; + result = result + "" + formName + ""; + result = result + ""; + } + result = result + ""; + } + } + result = result + ""; + } + result = result + ""; + } + result = result + ""; + return result; + } + + private String createHtmlOutputString(String query, ArrayList lemmas, String outputType, String elapsedTime) { + String result = ""; + result = result + ""; + result = result + ""; + result = result + "Lemmas for: \"" + query + "\""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + ""; + result = result + "
          [This is a MPIWG language technology service] \"MPIWG
          "; + result = result + "

          "; + result = result + "

          Lemmas for: \"" + query + "\"

          "; + if (lemmas != null && ! lemmas.isEmpty()) { + result = result + "

          Morphology

          "; + result = result + "
            "; + result = result + "

            "; + for (int i=0; i"; + result = result + lemmaName; + if (outputType != null && outputType.equals("full")) { + String lemmaProvider = lemma.getProvider(); + result = result + " (data provider: " + lemmaProvider + ")"; + } + if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) + result = result + " (external link: " + lemmaName + ")"; + else if (Language.getInstance().isGreek(language)) + result = result + " (external link: " + lemmaName + ")"; + if (outputType != null && outputType.equals("full")) { + ArrayList

            forms = lemma.getFormsList(); + Collections.sort(forms); + if (forms != null && ! forms.isEmpty()) { + result = result + "
              "; + for (int j=0; j"; + } + } + result = result + ""; + } + result = result + "
            "; + } + result = result + "[* external links may not function]"; + result = result + "
            "; + result = result + "

            "; + result = result + "Elapsed time: " + elapsedTime + " ms, see the service description of this page, if you find a bug let us know"; + result = result + ""; + result = result + ""; + return result; + } + + private String createStringOutputString(ArrayList lemmas) { + String result = ""; + for (int i=0; i")) // TODO check properly for xml type of the inputText + inputTextIsXml = true; + if (! inputTextIsXml) { + inputText = "" + inputText + ""; + } + StringReader xmlInputStringReader = new StringReader(inputText); + XmlTokenizer xmlTokenizer = new XmlTokenizer(xmlInputStringReader); + xmlTokenizer.setDocIdentifier(srcUrlStr); // TODO + xmlTokenizer.setLanguage(language); + xmlTokenizer.setNormFunctions(normFunctions); + xmlTokenizer.setOutputFormat(outputFormat); + xmlTokenizer.setOutputOptions(outputOptions); + if (stopElementsArray != null && stopElementsArray.length > 0) + xmlTokenizer.setStopElements(stopElementsArray); + if (elementsArray != null && elementsArray.length > 0) + xmlTokenizer.setElements(elementsArray); + if (highlightTermsArray != null && highlightTermsArray.length > 0) + xmlTokenizer.setHighlightTerms(highlightTermsArray); + xmlTokenizer.tokenize(); + if (outputFormat != null && outputFormat.equals("xml")) { + result = xmlTokenizer.getXmlResult(); + } else { // outputFormat == string + result = xmlTokenizer.getStringResult(); + } + if (result != null) + out.print(result); + out.close(); + } catch (ApplicationException e) { + throw new ServletException(e); + } + } + + private ArrayList getToken(String inputString, String language, String[] normFunctions) throws ApplicationException { + ArrayList retTokens = null; + try { + StringReader reader = new StringReader(inputString); + Tokenizer tokenizer = new Tokenizer(reader); + tokenizer.setLanguage(language); + tokenizer.setNormFunctions(normFunctions); + ArrayList tokens = tokenizer.getTokens(); + if (tokens != null) { + retTokens = new ArrayList(); + for (int i=0; i tokens, Hashtable> tokensDictionaries, String baseUrl, String elapsedTime) { + StringBuilder result = new StringBuilder(); + result.append(""); + result.append("" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + ""); + result.append("" + elapsedTime + ""); + if (tokens != null && ! tokens.isEmpty()) { + result.append(""); + for (int i=0; i"); + result.append("" + token + ""); + if (tokensDictionaries != null && ! tokensDictionaries.isEmpty()) { + ArrayList tokenDictionaries = tokensDictionaries.get(token); + if (tokenDictionaries != null) { + result.append(""); + for (int j=0; j"); + } + } + result.append(""); + } + result.append(""); + } + result.append(""); + return result.toString(); + } + + private String createStringOutputString(ArrayList tokens) { + StringBuilder result = new StringBuilder(); + if (tokens != null && ! tokens.isEmpty()) { + for (int i=0; i + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/.externalToolBuilders/mpiwg-mpdl-cms-build.launch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/.externalToolBuilders/mpiwg-mpdl-cms-build.launch Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/.project --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/.project Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,27 @@ + + + mpiwg-mpdl-cms + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.ui.externaltools.ExternalToolBuilder + full,incremental, + + + LaunchConfigHandle + <project>/.externalToolBuilders/mpiwg-mpdl-cms-build.launch + + + + + + org.eclipse.jdt.core.javanature + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/.settings/org.eclipse.core.resources.prefs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/.settings/org.eclipse.core.resources.prefs Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,6 @@ +#Thu May 17 12:31:24 CEST 2012 +eclipse.preferences.version=1 +encoding//src/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.java=UTF-8 +encoding//src/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.java=UTF-8 +encoding//src/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.java=UTF-8 +encoding//src/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.java=UTF-8 diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/constants.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/constants.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +dataDir=/Users/jwillenborg/mpdl/data/lt +documentsDir=/Users/jwillenborg/mpdl/data/xml/documents +luceneDocumentsDir=/Users/jwillenborg/mpdl/data/xml/lucene/documents +luceneNodesDir=/Users/jwillenborg/mpdl/data/xml/lucene/nodes +confDir=/Users/jwillenborg/mpdl/data/collectionConfs \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/CollectionReader.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/CollectionReader.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManager.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManager.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManagerResultWrapper.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManagerResultWrapper.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/Document.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/Document.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/Hits.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/Hits.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/MetadataRecord.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/MetadataRecord.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandlerUserAgent.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandlerUserAgent.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/Token.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/document/Token.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/general/Constants.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/general/Constants.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainScheduler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainScheduler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainSchedulerListener.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainSchedulerListener.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocJob.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocJob.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocOperation.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocOperation.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/quartz.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/quartz.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,9 @@ +org.quartz.scheduler.instanceName = MpdlScheduler +org.quartz.scheduler.instanceId = auto +org.quartz.scheduler.rmi.export = false +org.quartz.scheduler.rmi.proxy = false + +org.quartz.threadPool.class = org.quartz.simpl.SimpleThreadPool +org.quartz.threadPool.threadCount = 3 + +org.quartz.jobStore.class = org.quartz.simpl.RAMJobStore diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler$Element.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler$Element.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/PageTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/PageTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/QueryTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/QueryTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/TocTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/TocTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/XslResourceTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/XslResourceTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/chars.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/chars.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,10 @@ + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageArchimedes.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageArchimedes.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,332 @@ + + + + + + + + + + + + +

            + + + +
            + + + + + + + + + + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            +
            + + + +
            + + + +
            +
            +
            + + + +
            + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEcho.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEcho.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,559 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            +
            + + + +
            + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            +
            + + + + +
            + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageTei.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageTei.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,690 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            + + + + + + + + + + + + + + + + +
            + Figure: {$figDesc} +
            + +
            +
            +
            +
            +
            +
            + + + + + +
          • +
            +
            +
            +
            +
            +
            +
            +
            + + + +
            + + + +
            +
            +
            + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + +01002 b + ghj565hghj23,b02,0 + 0 + bvcxvb4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
          • +
            +
            +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXhtml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXhtml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,240 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            +
            + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
              + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/queryDocument.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/queryDocument.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,93 @@ + + + + + + + + + + +' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + + + + + +
            +
            + + + + + + + + 1 + + + + + + + + xmlId + 4711 + + false + + + + + + + +
            +
            + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/replaceAnchor.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/replaceAnchor.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/toc.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/toc.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + Table of contents + + + + + + + Figures + + + + + + + Notes + + + + + + + Pages + + + + + + + Places + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocOut.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocOut.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/translator/MicrosoftTranslator.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/cms/translator/MicrosoftTranslator.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/bin/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/build.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/build.xml Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,39 @@ + + + mpiwg-mpdl-cms + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/constants.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/constants.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +dataDir=/Users/jwillenborg/mpdl/data/lt +documentsDir=/Users/jwillenborg/mpdl/data/xml/documents +luceneDocumentsDir=/Users/jwillenborg/mpdl/data/xml/lucene/documents +luceneNodesDir=/Users/jwillenborg/mpdl/data/xml/lucene/nodes +confDir=/Users/jwillenborg/mpdl/data/collectionConfs \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/CollectionReader.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/CollectionReader.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManager.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManager.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManagerResultWrapper.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManagerResultWrapper.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/Document.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/Document.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/Hits.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/Hits.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/MetadataRecord.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/MetadataRecord.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandlerUserAgent.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandlerUserAgent.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/Token.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/document/Token.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/general/Constants.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/general/Constants.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainScheduler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainScheduler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainSchedulerListener.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainSchedulerListener.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocJob.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocJob.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocOperation.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocOperation.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/quartz.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/quartz.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,9 @@ +org.quartz.scheduler.instanceName = MpdlScheduler +org.quartz.scheduler.instanceId = auto +org.quartz.scheduler.rmi.export = false +org.quartz.scheduler.rmi.proxy = false + +org.quartz.threadPool.class = org.quartz.simpl.SimpleThreadPool +org.quartz.threadPool.threadCount = 3 + +org.quartz.jobStore.class = org.quartz.simpl.RAMJobStore diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler$1.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler$1.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler$Element.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler$Element.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/PageTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/PageTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/QueryTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/QueryTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/ReplaceAnchorTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/ReplaceAnchorTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/TocTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/TocTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/XslResourceTransformer.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/XslResourceTransformer.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/chars.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/chars.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,10 @@ + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/highlight.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/highlight.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,29 @@ + + + + + + + + + +
            +
            + + + + + + +
            +
            + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageArchimedes.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageArchimedes.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,332 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            +
            + + + +
            + + + +
            +
            +
            + + + +
            + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEcho.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEcho.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,559 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            +
            + + + +
            + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            +
            + + + + +
            + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEchoSpan.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEchoSpan.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageTei.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageTei.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,686 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            + + + + + + + + + + + + + + + + +
            + Figure: {$figDesc} +
            + +
            +
            +
            +
            +
            +
            + + + + + +
          • +
            +
            +
            +
            +
            +
            +
            +
            + + + +
            + + + +
            +
            +
            + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
          • +
            +
            +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXhtml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXhtml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,240 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            +
            + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
              + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/queryDocument.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/queryDocument.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,93 @@ + + + + + + + + + + +' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + + + + + +
            +
            + + + + + + + + 1 + + + + + + + + xmlId + 4711 + + false + + + + + + + +
            +
            + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/replaceAnchor.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/replaceAnchor.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/toc.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/toc.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + Table of contents + + + + + + + Figures + + + + + + + Notes + + + + + + + Pages + + + + + + + Places + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocHtml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocHtml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocOut.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocOut.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/translator/MicrosoftTranslator.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/cms/translator/MicrosoftTranslator.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class Binary file software/mpdl-services-new/mpiwg-mpdl-cms/build/classes/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.class has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/dist/mpiwg-mpdl-cms.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/dist/mpiwg-mpdl-cms.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-codec-1.3.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-codec-1.3.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-collections-3.2.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-collections-3.2.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-collections-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-collections-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-collections-NOTICE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-collections-NOTICE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +Apache Commons Collections +Copyright 2001-2008 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-httpclient-3.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-httpclient-3.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-httpclient-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-httpclient-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-httpclient-NOTICE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-httpclient-NOTICE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +Apache Jakarta HttpClient +Copyright 1999-2007 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-io-2.0.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-io-2.0.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-lang3-3.0.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-lang3-3.0.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-logging-1.1.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-logging-1.1.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-logging-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-logging-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-logging-NOTICE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/commons-logging-NOTICE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,6 @@ +Apache Commons Logging +Copyright 2003-2007 The Apache Software Foundation + +This product includes software developed by +The Apache Software Foundation (http://www.apache.org/). + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/core-renderer.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/core-renderer.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/httpclient-4.1.2.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/httpclient-4.1.2.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/httpcore-4.1.2.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/httpcore-4.1.2.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/iText-2.0.8.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/iText-2.0.8.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/json_simple-1.1.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/json_simple-1.1.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-core-3.5.0-javadoc.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-core-3.5.0-javadoc.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-core-3.5.0-sources.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-core-3.5.0-sources.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-core-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-core-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-highlighter-3.5.0-javadoc.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-highlighter-3.5.0-javadoc.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-highlighter-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-highlighter-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-memory-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-memory-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-queries-3.5.0.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/lucene-queries-3.5.0.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/microsoft-translator-java-api-0.4.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/microsoft-translator-java-api-0.4.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/mpiwg-mpdl-lt.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/mpiwg-mpdl-lt.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/mpiwg-mpdl-xml.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/mpiwg-mpdl-xml.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/quartz-1.6.5.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/quartz-1.6.5.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/quartz-LICENSE.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/quartz-LICENSE.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon.txt Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,3 @@ +Saxon: + +Release 9.1.0.5 (free version): releases < 9.1.0.7 support saxon extension functions diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon9-dom.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon9-dom.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon9-s9api.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon9-s9api.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon9.jar Binary file software/mpdl-services-new/mpiwg-mpdl-cms/lib/saxon9.jar has changed diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/constants.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/constants.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,5 @@ +dataDir=/Users/jwillenborg/mpdl/data/lt +documentsDir=/Users/jwillenborg/mpdl/data/xml/documents +luceneDocumentsDir=/Users/jwillenborg/mpdl/data/xml/lucene/documents +luceneNodesDir=/Users/jwillenborg/mpdl/data/xml/lucene/nodes +confDir=/Users/jwillenborg/mpdl/data/collectionConfs \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/CollectionReader.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/CollectionReader.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,122 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.confmanager; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.harvester.PathExtractor; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class CollectionReader { + + ConfManagerResultWrapper cmrw; + private HashMap wrapperContainer; + private static CollectionReader collectionReader; + + private CollectionReader() { + wrapperContainer = new HashMap(); + readConfFiles(); + } + + public static CollectionReader getInstance() { + if (collectionReader == null) + collectionReader = new CollectionReader(); + return collectionReader; + } + + private void readConfFiles(){ + System.out.println("---------------"); + System.out.println("reading configuration files..."); + + // holt alle konfiguratiuonsdateien aus dem konf-Ordner + PathExtractor ext = new PathExtractor(); + List configsList = ext.extractPathLocally(Constants.getInstance().getConfDir()); + System.out.println("Anzahl der konfugirationsdateien : " + configsList.size()); + DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); + // docFactory.setNamespaceAware(true); + DocumentBuilder builder = null; + + File configFile = null; + for (String configXml : configsList) { + System.out.println("reading : " + configXml); + try { + builder = docFactory.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + } + configFile = new File(configXml); + Document document = null; + try { + document = builder.parse(configFile); + } catch (SAXException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + cmrw = new ConfManagerResultWrapper(); + + NodeList idlist = document.getElementsByTagName("collectionId"); + // darf jeweils nur ein node enthalten sein + Node idNode = idlist.item(0); + if(idNode != null){ + if (!idNode.getTextContent().equals("")) { + cmrw.setCollectionId(idNode.getTextContent()); + } + } + NodeList nodeliste = document.getElementsByTagName("mainLanguage"); + // darf jeweils nur ein node enthalten sein + Node langNode = nodeliste.item(0); + if(langNode != null){ + if (!langNode.getTextContent().equals("")) { + cmrw.setMainLanguage(langNode.getTextContent()); + } + } + NodeList collNamelist = document.getElementsByTagName("name"); + // darf jeweils nur ein node enthalten sein + Node nameNode = collNamelist.item(0); + if(nameNode != null){ + if (!nameNode.getTextContent().equals("")) { + cmrw.setCollectionName(nameNode.getTextContent()); + } + } + + NodeList fieldNodes = document.getElementsByTagName("field"); + ArrayList fields = new ArrayList(); + fields = new ArrayList(); + if(fieldNodes != null){ + for (int i = 0; i < fieldNodes.getLength(); i++) { + if (!fieldNodes.item(i).getTextContent().equals("")) { + fields.add((fieldNodes.item(i).getTextContent().trim())); + } + } + } + cmrw.setFields(fields); + + NodeList nodeli = document.getElementsByTagName("collectionDataUrl"); + Node dataNode = nodeli.item(0); + if(dataNode != null){ + if (!dataNode.getTextContent().trim().equals("")) { + cmrw.setCollectionDataUrl(dataNode.getTextContent()); + } + } + + wrapperContainer.put(cmrw.getCollectionId(), cmrw); + } + } + + public ConfManagerResultWrapper getResultWrapper(String collectionId) { + ConfManagerResultWrapper cmrw = wrapperContainer.get(collectionId); + return cmrw; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManager.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManager.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,139 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.confmanager; + +import java.io.File; +import java.io.FileOutputStream; +import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPathExpressionException; + +import de.mpg.mpiwg.berlin.mpdl.cms.harvester.PathExtractor; + +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; + +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import com.sun.org.apache.xml.internal.serialize.XMLSerializer; + +public class ConfManager { + private ConfManagerResultWrapper cmrw; + private HashMap wrapperContainer; + private static ConfManager confManager; + + private ConfManager() { + wrapperContainer = new HashMap(); + try { + checkCollectionConfFiles(); + } catch (XPathExpressionException e) { + e.printStackTrace(); + } + } + + public static ConfManager getInstance(){ + if(confManager == null) + confManager = new ConfManager(); + return confManager; + } + + /** + * checks if an update of a project is necessary by checking configuration + * file + * + * @throws XPathExpressionException + */ + private void checkCollectionConfFiles() throws XPathExpressionException { + System.out.println("---------------"); + System.out.println("checking configuration files..."); + // holt alle Konfiguratiuonsdateien aus dem konf-Ordner + PathExtractor ext = new PathExtractor(); + List configsList = ext.extractPathLocally(Constants.getInstance().getConfDir()); + System.out.println("Anzahl der Konfigurationsdateien : " + configsList.size()); + try { + File configFile = null; + // Ueberprueft alle Konf-dateien auf update und fuehrt es bei Bedarf aus + for (String configXml : configsList) { + System.out.println("checking : " + configXml); + XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); + configFile = new File(configXml); + URL srcUrl = configFile.toURI().toURL(); + String update = xQueryEvaluator.evaluateAsString(srcUrl, "//collection/update/text()"); + if (update != null && update.equals("true")) { + System.out.println("update tag is set on : " + update); + cmrw = new ConfManagerResultWrapper(); + String collectionId = xQueryEvaluator.evaluateAsString(srcUrl, "//collectionId/text()"); + if (collectionId != null) { + cmrw.setCollectionId(collectionId); + } + String mainLanguage = xQueryEvaluator.evaluateAsString(srcUrl, "//mainLanguage/text()"); + if (mainLanguage != null) { + cmrw.setMainLanguage(mainLanguage); + } + String name = xQueryEvaluator.evaluateAsString(srcUrl, "//name/text()"); + if (name != null) { + cmrw.setCollectionName(name); + } + String fieldsStr = xQueryEvaluator.evaluateAsStringValueJoined(srcUrl, "//field"); + ArrayList fields = new ArrayList(); + if (fields != null) { + fieldsStr = fieldsStr.trim(); + String[] fieldsArray = fieldsStr.split(" "); + for (int i=0; i collectionUrls = new ArrayList(); + collectionUrls.add(collectionDataUrl); + cmrw.setCollectionUrls(collectionUrls); + } + // flag im Konfigurations-File auf false setzen durch serialisierung in das File + DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = docFactory.newDocumentBuilder(); + Document configFileDocument = builder.parse(configFile); + NodeList updateNodeList = configFileDocument.getElementsByTagName("update"); + Node n = updateNodeList.item(0); + n.setTextContent("false"); + FileOutputStream os = new FileOutputStream(configFile); + XMLSerializer ser = new XMLSerializer(os, null); + ser.serialize(configFileDocument); // Vorsicht: wenn es auf true ist: es wird alles neu indexiert + } + wrapperContainer.put(collectionId, cmrw); + } + } + } catch(Exception e) { + e.printStackTrace(); + } + } + + /** + * der Extractor holt alle Projekt zugehoerigen Urls + */ + private void extractUrlsFromCollections(String collectionDataUrl, ConfManagerResultWrapper cmrw, String excludesStr) { + System.out.println("collecting urls of resources that need update..."); + if(!collectionDataUrl.equals("")){ + PathExtractor extractor = new PathExtractor(); + List collectionUrls = extractor.initExtractor(collectionDataUrl, excludesStr); + cmrw.setCollectionUrls(collectionUrls); + } + } + + public ConfManagerResultWrapper getResultWrapper(String collectionId) { + ConfManagerResultWrapper cmrw = wrapperContainer.get(collectionId); + return cmrw; + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManagerResultWrapper.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/confmanager/ConfManagerResultWrapper.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,93 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.confmanager; + +import java.util.ArrayList; +import java.util.List; + +public class ConfManagerResultWrapper { + + private List collectionUrls; + private String collectionId; + private String collectionName; + private String collectionDataUrl; + private String mainLanguage; + private ArrayList fields; + private List formats; + private List excludeField; + + + public ConfManagerResultWrapper(){ + collectionUrls = new ArrayList(); + collectionId = ""; + collectionName = ""; + collectionDataUrl = ""; + mainLanguage = ""; + fields = new ArrayList(); + formats = new ArrayList(); + excludeField = new ArrayList(); + } + + public List getCollectionUrls() { + return collectionUrls; + } + + public String getCollectionId() { + return collectionId; + } + + public String getCollectionName() { + return collectionName; + } + + public void setCollectionId(String collectionId){ + this.collectionId = collectionId; + } + + public void setCollectionUrls(List collectionUrls){ + this.collectionUrls = collectionUrls; + } + + public void setCollectionName(String collectionName) { + this.collectionName = collectionName; + } + + public String getCollectiondataUrl() { + return collectionDataUrl; + } + + public void setCollectionDataUrl(String collectiondataUrl) { + this.collectionDataUrl = collectiondataUrl; + } + + public List getFormats() { + return formats; + } + + public void setFormats(List formats) { + this.formats = formats; + } + + public List getExcludeField() { + return excludeField; + } + + public void setExcludeField(List excludeField) { + this.excludeField = excludeField; + } + + public String getMainLanguage() { + return mainLanguage; + } + + public void setMainLanguage(String mainLanguage) { + this.mainLanguage = mainLanguage; + } + + public ArrayList getFields() { + return fields; + } + + public void setFields(ArrayList fields) { + this.fields = fields; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/Document.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/Document.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,38 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.util.ArrayList; + +import org.apache.lucene.document.Fieldable; + +public class Document { + private org.apache.lucene.document.Document document; + private ArrayList hitFragments; + + public Document(org.apache.lucene.document.Document luceneDocument) { + this.document = luceneDocument; + } + + public org.apache.lucene.document.Document getDocument() { + return document; + } + + public Fieldable getFieldable(String field) { + if (document != null) + return document.getFieldable(field); + else + return null; + } + + public void setDocument(org.apache.lucene.document.Document document) { + this.document = document; + } + + public ArrayList getHitFragments() { + return hitFragments; + } + + public void setHitFragments(ArrayList hitFragments) { + this.hitFragments = hitFragments; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,927 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.StringReader; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Arrays; +import java.util.Date; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.List; +import java.util.logging.Logger; + +import net.sf.saxon.s9api.Axis; +import net.sf.saxon.s9api.QName; +import net.sf.saxon.s9api.XdmNode; +import net.sf.saxon.s9api.XdmNodeKind; +import net.sf.saxon.s9api.XdmSequenceIterator; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.io.FileUtils; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.params.BasicHttpParams; +import org.apache.http.params.HttpConnectionParams; +import org.apache.http.params.HttpParams; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; +import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; +import de.mpg.mpiwg.berlin.mpdl.util.Util; +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.GetFragmentsContentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.XslResourceTransformer; + +/** + * Handler for documents (singleton). + */ +public class DocumentHandler { + private static Logger LOGGER = Logger.getLogger(DocumentHandler.class.getName()); + private static List EXCLUDED_PROJECT_DOCS = + Arrays.asList("/echo/zh/Min_chan_luyi_1_7MCGW0WG.xml", // the Saxon transfomer has heavy problems with some characters in CJK Unified Ideographs Extension B, e.g.: line 309 (second reg on page 16) + "/echo/zh/Min_chan_luyi_2_U7Y9NQ9V.xml", + "/echo/zh/Min_chan_luyi_3_2FP9M172.xml", + "/echo/zh/Min_chan_luyi_4_FXA6FSFH.xml", + "/echo/zh/Min_chan_luyi_5_VG6NY5XD.xml", + "/echo/zh/Xifa_shenji.xml", + "/echo/zh/Yulei_tushuo_1_HXX4MGZW.xml", + "/echo/zh/Yulei_tushuo_2_FN1CTY5C.xml"); + private long beginOfOperation; + private long endOfOperation; + + public void doOperation(CmsDocOperation docOperation) throws ApplicationException { + String operationName = docOperation.getName(); + if (operationName.equals("create")) { + create(docOperation); + } else if (operationName.equals("delete")) { + delete(docOperation); + } else if (operationName.equals("importDirectory")) { + importDirectory(docOperation); + } else if (operationName.equals("createPdf")) { + createPdf(docOperation); + } else if (operationName.equals("createAllPdfInDirectory")) { + createAllPdfInDirectory(docOperation); + } + } + + private void importDirectory(CmsDocOperation docOperation) throws ApplicationException { + try { + LOGGER.info("Start of DocumentHandler. This operation could be time consuming because documents are indexed (normal indexing times are 1-10 minutes for a document)"); + beginOperation(); + String localDocumentsUrlStr = docOperation.getSrcUrl(); // start directory: file:/a/local/directory + String collectionNames = docOperation.getCollectionNames(); // e.g. "echo" + File localDocumentsDir = new File(new URI(localDocumentsUrlStr)); + boolean docDirExists = localDocumentsDir.exists(); + if (! docDirExists) + throw new ApplicationException("Document directory:" + localDocumentsUrlStr + " does not exists. Please use a directory that exists and perform the operation again."); + String[] fileExtensions = {"xml"}; + Iterator iterFiles = FileUtils.iterateFiles(localDocumentsDir, fileExtensions, true); + int i = 0; + while(iterFiles.hasNext()) { + i++; + File xmlFile = iterFiles.next(); + String xmlFileStr = xmlFile.getPath(); + int relativePos = (int) localDocumentsDir.getPath().length(); + String docId = xmlFileStr.substring(relativePos); // relative path name starting from localDocumentsDir, e.g. /tei/de/Test_1789.xml + String xmlFileUrlStr = xmlFile.toURI().toURL().toString(); + CmsDocOperation createDocOperation = new CmsDocOperation("create", xmlFileUrlStr, null, docId); + createDocOperation.setCollectionNames(collectionNames); + try { + doOperation(createDocOperation); + Date now = new Date(); + LOGGER.info("Document " + i + ": " + docId + " successfully imported (" + now.toString() + ")"); + } catch (Exception e) { + LOGGER.info("Document " + i + ": " + docId + " has problems:"); + e.printStackTrace(); + } + } + endOperation(); + LOGGER.info("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" ); + } catch (Exception e) { + throw new ApplicationException(e); + } + } + + private void createAllPdfInDirectory(CmsDocOperation docOperation) throws ApplicationException { + try { + LOGGER.info("Start of generating Pdf-Documents. This operation could be time consuming because Pdf generation needs some time."); + beginOperation(); + String localDocumentsUrlStr = docOperation.getSrcUrl(); // start directory: file:/a/local/directory + String collectionNames = docOperation.getCollectionNames(); // e.g. "echo" + File localDocumentsDir = new File(new URI(localDocumentsUrlStr)); + boolean docDirExists = localDocumentsDir.exists(); + if (! docDirExists) + throw new ApplicationException("Document directory:" + localDocumentsUrlStr + " does not exists. Please use a directory that exists and perform the operation again."); + String[] fileExtensions = {"xml"}; + Iterator iterFiles = FileUtils.iterateFiles(localDocumentsDir, fileExtensions, true); + int i = 0; + while(iterFiles.hasNext()) { + i++; + File xmlFile = iterFiles.next(); + String xmlFileStr = xmlFile.getPath(); + int relativePos = (int) localDocumentsDir.getPath().length(); + String docId = xmlFileStr.substring(relativePos); // relative path name starting from localDocumentsDir, e.g. /tei/de/Test_1789.xml + CmsDocOperation createPdfOperation = new CmsDocOperation("createPdf", null, null, docId); + createPdfOperation.setCollectionNames(collectionNames); + try { + doOperation(createPdfOperation); + Date now = new Date(); + LOGGER.info("Pdf document " + i + ": " + docId + " successfully created (" + now.toString() + ")"); + } catch (Exception e) { + LOGGER.info("Pdf document " + i + ": " + docId + " has problems:"); + e.printStackTrace(); + } + } + endOperation(); + LOGGER.info("The Pdf generation needed: " + (endOfOperation - beginOfOperation) + " ms" ); + } catch (Exception e) { + throw new ApplicationException(e); + } + } + + private boolean isProjectDoc(String docId) { + boolean isProjectDoc = true; + if (EXCLUDED_PROJECT_DOCS.contains(docId)) + return false; + return isProjectDoc; + } + + private void create(CmsDocOperation docOperation) throws ApplicationException { + try { + String operationName = docOperation.getName(); + String srcUrlStr = docOperation.getSrcUrl(); + String docId = docOperation.getDocIdentifier(); + if (! isProjectDoc(docId)) { + LOGGER.info("Operation: " + operationName + " not performed on: " + docId + ". Cause: document is excluded as project doc"); + return; + } + String mainLanguage = docOperation.getMainLanguage(); + if (mainLanguage == null) { + mainLanguage = getMainLanguage(docId); + } + String[] elementNames = docOperation.getElementNames(); + if (elementNames == null) { + String[] defaultElementNames = {"s", "head", "caption", "variables", "description"}; + docOperation.setElementNames(defaultElementNames); // default + } + String docDirName = getDocDir(docId); + String docDestFileName = getDocFullFileName(docId); + URL srcUrl = null; + String protocol = null; + if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { + srcUrl = new URL(srcUrlStr); + protocol = srcUrl.getProtocol(); + } + File docDestFile = new File(docDestFileName); + // parse validation on file + XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); + XdmNode docNode = xQueryEvaluator.parse(srcUrl); // if it is not parseable an exception with a detail message is thrown + String docType = getNodeType(docNode); // archimedes, echo, TEI, html ... + docType = docType.trim(); + if (docType == null) { + docOperation.setErrorMessage("file type of: " + srcUrlStr + "is not supported"); + return; + } + // perform operation on file system + if (protocol.equals("file")) { + docOperation.setStatus("upload file: " + srcUrlStr + " to CMS"); + } else { + docOperation.setStatus("download file from: " + srcUrlStr + " to CMS"); + } + FileUtils.copyURLToFile(srcUrl, docDestFile, 100000, 100000); + + // replace anchor in echo documents and also add the number attribute to figures + String docDestFileNameUpgrade = docDestFileName + ".upgrade"; + File docDestFileUpgrade = new File(docDestFileNameUpgrade); + XslResourceTransformer replaceAnchorTransformer = new XslResourceTransformer("replaceAnchor.xsl"); + String docDestFileUrlStr = docDestFile.getPath(); + String result = replaceAnchorTransformer.transform(docDestFileUrlStr); + FileUtils.writeStringToFile(docDestFileUpgrade, result, "utf-8"); + + MetadataRecord mdRecord = new MetadataRecord(); + mdRecord.setDocId(docId); + mdRecord.setCollectionNames(docOperation.getCollectionNames()); + mdRecord.setType("text/xml"); + + // generate toc file (toc, figure, handwritten) + XslResourceTransformer tocTransformer = new XslResourceTransformer("toc.xsl"); + File tocFile = new File(docDirName + "/toc.xml"); + String tocResult = tocTransformer.transform(docDestFileNameUpgrade); + FileUtils.writeStringToFile(tocFile, tocResult, "utf-8"); + + // Get metadata info of the xml document + docOperation.setStatus("extract metadata of: " + srcUrlStr + " to CMS"); + XQueryEvaluator xQueryEvaluator2 = new XQueryEvaluator(); + mdRecord = getMetadataRecord(docDestFileUpgrade, docType, mdRecord, xQueryEvaluator2); + String mdRecordLanguage = mdRecord.getLanguage(); + if (mdRecordLanguage == null && mainLanguage != null) + mdRecord.setLanguage(mainLanguage); + + // save all pages as single xml files (untokenized and tokenized) + docOperation.setStatus("extract page fragments of: " + srcUrlStr + " to CMS"); + File docDir = new File(docDirName + "/pages"); + FileUtils.deleteQuietly(docDir); // first delete pages directory + Hashtable pageFragments = getFragments(docDestFileNameUpgrade, "pb"); + int pageCount = pageFragments.size(); + if (pageCount == 0) { + // no pb element is found: then the whole document is the first page + String docXmlStr = FileUtils.readFileToString(docDestFileUpgrade, "utf-8"); + docXmlStr = docXmlStr.replaceAll("<\\?xml.*?\\?>", ""); // remove the xml declaration if it exists + pageFragments = new Hashtable(); + pageFragments.put(new Integer(1), new StringBuilder(docXmlStr)); + pageCount = 1; + } + PageTransformer pageTransformer = new PageTransformer(); + for (int page=1; page<=pageCount; page++) { + String fragment = pageFragments.get(new Integer(page)).toString(); + fragment = "\n" + fragment; + String docPageFileName = docDirName + "/pages/page-" + page + ".xml"; + File docPageFile = new File(docPageFileName); + FileUtils.writeStringToFile(docPageFile, fragment, "utf-8"); + String language = mdRecord.getLanguage(); + String tokenizedXmlStr = tokenizeWithLemmas(fragment, language); // xml fragment enriched with elements + tokenizedXmlStr = "" + tokenizedXmlStr; + tokenizedXmlStr = enrichWordsOrigRegNorm(tokenizedXmlStr); // xml string: enrich elements with normalization info (orig, reg, norm) + String docPageTokenizedFileName = docDirName + "/pages/page-" + page + "-morph.xml"; + File docPageTokenizedFile = new File(docPageTokenizedFileName); + FileUtils.writeStringToFile(docPageTokenizedFile, tokenizedXmlStr, "utf-8"); + String docPageHtmlFileName = docDirName + "/pages/page-" + page + ".html"; + File docPageHtmlFile = new File(docPageHtmlFileName); + String htmlStr = pageTransformer.transform(tokenizedXmlStr, mdRecord, page, "html"); + FileUtils.writeStringToFile(docPageHtmlFile, htmlStr, "utf-8"); + } + + // perform operation on Lucene + docOperation.setStatus(operationName + " document: " + docId + " in CMS"); + docOperation.setMdRecord(mdRecord); + IndexHandler indexHandler = IndexHandler.getInstance(); + indexHandler.indexDocument(docOperation); + + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void delete(CmsDocOperation docOperation) throws ApplicationException { + String operationName = docOperation.getName(); + String docIdentifier = docOperation.getDocIdentifier(); + if (docIdentifier == null || docIdentifier.trim().equals("")) + throw new ApplicationException("Your document identifier is empty. Please specify a document identifier for your document."); + String docDirStr = getDocDir(docIdentifier); + File docDir = new File(docDirStr); + boolean docExists = docDir.exists(); + if (! docExists) { + throw new ApplicationException("Document:" + docIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again."); + } + // perform operation on file system + docOperation.setStatus(operationName + " document: " + docIdentifier + " in CMS"); + FileUtils.deleteQuietly(docDir); + + // perform operation on Lucene + IndexHandler indexHandler = IndexHandler.getInstance(); + indexHandler.deleteDocument(docOperation); + + } + + private void createPdf(CmsDocOperation docOperation) throws ApplicationException { + String docId = docOperation.getDocIdentifier(); + String operationName = docOperation.getName(); + if (docId == null || docId.trim().equals("")) + throw new ApplicationException("Your document identifier is empty. Please specify a document identifier for your document."); + if (! isProjectDoc(docId)) { + LOGGER.info("Operation: " + operationName + " not performed on: " + docId + ". Cause: document is excluded as project doc"); + return; + } + IndexHandler indexHandler = IndexHandler.getInstance(); + MetadataRecord mdRecord = indexHandler.getDocMetadata(docId); + docOperation.setStatus("create PDF and HTML versions of the document: " + docId); + PdfHandler pdfHandler = PdfHandler.getInstance(); + pdfHandler.createFile(true, true, mdRecord); // generate Pdf + Html document + } + + private MetadataRecord getMetadataRecord(File xmlFile, String schemaName, MetadataRecord mdRecord, XQueryEvaluator xQueryEvaluator) throws ApplicationException { + if (schemaName == null) + return mdRecord; + try { + URL srcUrl = xmlFile.toURI().toURL(); + if (schemaName.equals("archimedes")) + mdRecord = getMetadataRecordArch(xQueryEvaluator, srcUrl, mdRecord); + else if (schemaName.equals("echo")) + mdRecord = getMetadataRecordEcho(xQueryEvaluator, srcUrl, mdRecord); + else if (schemaName.equals("TEI")) + mdRecord = getMetadataRecordTei(xQueryEvaluator, srcUrl, mdRecord); + else if (schemaName.equals("html")) + mdRecord = getMetadataRecordHtml(xQueryEvaluator, srcUrl, mdRecord); + else + mdRecord.setSchemaName("diverse"); // all other cases: set docType to schemaName + } catch (MalformedURLException e) { + throw new ApplicationException(e); + } + mdRecord.setLastModified(new Date()); + return mdRecord; + } + + private MetadataRecord getMetadataRecordArch(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { + String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/archimedes//info"); + if (metadataXmlStr != null) { + String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/locator"); + if (identifier != null) + identifier = StringUtils.deresolveXmlEntities(identifier); + String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/author"); + if (creator != null) + creator = StringUtils.deresolveXmlEntities(creator); + String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/title"); + if (title != null) + title = StringUtils.deresolveXmlEntities(title); + String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/lang[1]"); + if (language != null) + language = StringUtils.deresolveXmlEntities(language); + String place = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/place"); + if (place != null) + place = StringUtils.deresolveXmlEntities(place); + String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/date"); + Date date = null; + if (yearStr != null && ! yearStr.equals("")) { + yearStr = StringUtils.deresolveXmlEntities(yearStr); + yearStr = new Util().toYearStr(yearStr); // test if possible etc + if (yearStr != null) { + try { + date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); + } catch (Exception e) { + // nothing + } + } + } + String rights = "open access"; + String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; + String accessRights = "free"; + + mdRecord.setIdentifier(identifier); + mdRecord.setLanguage(language); + mdRecord.setCreator(creator); + mdRecord.setTitle(title); + mdRecord.setPublisher(place); + mdRecord.setRights(rights); + mdRecord.setDate(date); + mdRecord.setLicense(license); + mdRecord.setAccessRights(accessRights); + + // get echo metadata + String echoDir = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/echodir"); + String docId = mdRecord.getDocId(); + String echoIdTmp = docId; + if (docId != null && ! docId.isEmpty()) { + int start = docId.lastIndexOf("/"); + if (start != -1) + start = start + 1; + else + start = 0; + int end = docId.lastIndexOf("."); + if (end == -1) + end = docId.length(); + echoIdTmp = docId.substring(start, end); + } + String echoId = "/permanent/archimedes/" + echoIdTmp; + if (echoIdTmp == null || echoIdTmp.isEmpty()) + echoId = null; + if (echoDir != null && ! echoDir.isEmpty()) { + echoId = echoDir; + } + mdRecord = getEchoMetadata(xQueryEvaluator, echoId, mdRecord); + } + String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//pb)"); + int pageCount = Integer.valueOf(pageCountStr); + mdRecord.setPageCount(pageCount); + mdRecord.setSchemaName("archimedes"); + return mdRecord; + } + + private MetadataRecord getMetadataRecordEcho(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { + String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/*:echo/*:metadata"); + if (metadataXmlStr != null) { + String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:identifier"); + if (identifier != null) { + identifier = StringUtils.deresolveXmlEntities(identifier); + } + String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:creator"); + if (creator != null) + creator = StringUtils.deresolveXmlEntities(creator); + String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:title"); + if (title != null) + title = StringUtils.deresolveXmlEntities(title); + String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:language[1]"); + if (language != null) + language = StringUtils.deresolveXmlEntities(language); + String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:date"); + Date date = null; + if (yearStr != null && ! yearStr.equals("")) { + yearStr = StringUtils.deresolveXmlEntities(yearStr); + yearStr = new Util().toYearStr(yearStr); // test if possible etc + if (yearStr != null) { + try { + date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); + } catch (Exception e) { + // nothing + } + } + } + String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:rights"); + if (rights != null) + rights = StringUtils.deresolveXmlEntities(rights); + String license = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:license"); + if (license != null) + license = StringUtils.deresolveXmlEntities(license); + String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:accessRights"); + if (accessRights != null) + accessRights = StringUtils.deresolveXmlEntities(accessRights); + + mdRecord.setIdentifier(identifier); + mdRecord.setLanguage(language); + mdRecord.setCreator(creator); + mdRecord.setTitle(title); + mdRecord.setRights(rights); + mdRecord.setDate(date); + mdRecord.setLicense(license); + mdRecord.setAccessRights(accessRights); + + // get echo metadata + String echoDir = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:echodir"); + String echoIdTmp = identifier; + if (identifier != null && ! identifier.isEmpty()) { + int start = identifier.indexOf("ECHO:"); + if (start != -1) + start = start + 5; + else + start = 0; + int end = identifier.lastIndexOf("."); + if (end == -1) + end = identifier.length(); + echoIdTmp = identifier.substring(start, end); + } + String echoId = "/permanent/library/" + echoIdTmp; + if (echoIdTmp == null || echoIdTmp.isEmpty()) + echoId = null; + if (echoDir != null && ! echoDir.isEmpty()) { + echoId = echoDir; + } + mdRecord = getEchoMetadata(xQueryEvaluator, echoId, mdRecord); + } + String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//*:pb)"); + int pageCount = Integer.valueOf(pageCountStr); + mdRecord.setPageCount(pageCount); + mdRecord.setSchemaName("echo"); + return mdRecord; + } + + private MetadataRecord getMetadataRecordTei(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { + String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/*:TEI/*:teiHeader"); + if (metadataXmlStr != null) { + String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:idno"); + if (identifier != null) { + identifier = StringUtils.deresolveXmlEntities(identifier); + identifier = deleteSpecialChars(identifier); + } + String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:titleStmt/*:author"); + if (creator != null) + creator = StringUtils.deresolveXmlEntities(creator); + String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:titleStmt/*:title"); + if (title != null) + title = StringUtils.deresolveXmlEntities(title); + String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:profileDesc/*:langUsage/*:language[1]/@ident)"); + if (language != null && language.isEmpty()) + language = null; + if (language != null) { + language = language.toLowerCase(); + if (language.length() == 5) { // e.g. "de-DE or en-US" + if (language.substring(2, 3).equals("-")) { + String lang = language.substring(0, 2); + language = Language.getInstance().getISO639Code(lang); + } + } + } + String place = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:pubPlace"); + if (place != null) + place = StringUtils.deresolveXmlEntities(place); + String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:date"); + Date date = null; + if (yearStr != null && ! yearStr.equals("")) { + yearStr = StringUtils.deresolveXmlEntities(yearStr); + yearStr = new Util().toYearStr(yearStr); // test if possible etc + if (yearStr != null) { + try { + date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); + } catch (Exception e) { + // nothing + } + } + } + String subject = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:profileDesc/*:textClass/*:keywords/*:term)"); + if (subject != null) + subject = StringUtils.deresolveXmlEntities(subject); + String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:availability"); + if (rights == null) + rights = "open access"; + rights = StringUtils.deresolveXmlEntities(rights); + String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; + String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:fileDesc/*:publicationStmt/*:availability/@status)"); + if (accessRights == null) + accessRights = "free"; + accessRights = StringUtils.deresolveXmlEntities(accessRights); + + mdRecord.setIdentifier(identifier); + mdRecord.setLanguage(language); + mdRecord.setCreator(creator); + mdRecord.setTitle(title); + mdRecord.setPublisher(place); + mdRecord.setRights(rights); + mdRecord.setDate(date); + mdRecord.setSubject(subject); + mdRecord.setLicense(license); + mdRecord.setAccessRights(accessRights); + + // get echo metadata + mdRecord = getEchoMetadata(xQueryEvaluator, identifier, mdRecord); // identifier is echoDir + } + String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//*:pb)"); + int pageCount = Integer.valueOf(pageCountStr); + mdRecord.setPageCount(pageCount); + mdRecord.setSchemaName("TEI"); + return mdRecord; + } + + private MetadataRecord getMetadataRecordHtml(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { + String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/html/head"); + if (metadataXmlStr != null) { + String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.identifier']/@content)"); + if (identifier != null && ! identifier.isEmpty()) + identifier = StringUtils.deresolveXmlEntities(identifier); + String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.creator']/@content)"); + if (creator != null && ! creator.isEmpty()) + creator = StringUtils.deresolveXmlEntities(creator); + String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.title']/@content)"); + if (title != null && ! title.isEmpty()) + title = StringUtils.deresolveXmlEntities(title); + String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.language']/@content)"); + if (language != null && language.isEmpty()) + language = null; + if (language != null && ! language.isEmpty()) + language = StringUtils.deresolveXmlEntities(language); + String publisher = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.publisher']/@content)"); + if (publisher != null) + publisher = StringUtils.deresolveXmlEntities(publisher); + String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.date']/@content)"); + Date date = null; + if (yearStr != null && ! yearStr.equals("")) { + yearStr = StringUtils.deresolveXmlEntities(yearStr); + yearStr = new Util().toYearStr(yearStr); // test if possible etc + if (yearStr != null) { + try { + date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); + } catch (Exception e) { + // nothing + } + } + } + String subject = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.subject']/@content)"); + if (subject != null) + subject = StringUtils.deresolveXmlEntities(subject); + String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.rights']/@content)"); + if (rights != null && ! rights.isEmpty()) + rights = StringUtils.deresolveXmlEntities(rights); + String license = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.license']/@content)"); + if (license != null && ! license.isEmpty()) + license = StringUtils.deresolveXmlEntities(license); + String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.accessRights']/@content)"); + if (accessRights != null && ! accessRights.isEmpty()) + accessRights = StringUtils.deresolveXmlEntities(accessRights); + + mdRecord.setIdentifier(identifier); + mdRecord.setLanguage(language); + mdRecord.setCreator(creator); + mdRecord.setTitle(title); + mdRecord.setPublisher(publisher); + mdRecord.setRights(rights); + mdRecord.setDate(date); + mdRecord.setSubject(subject); + mdRecord.setLicense(license); + mdRecord.setAccessRights(accessRights); + + // get echo metadata + mdRecord = getEchoMetadata(xQueryEvaluator, identifier, mdRecord); // identifier is echoDir + } + String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//pb)"); + int pageCount = Integer.valueOf(pageCountStr); + mdRecord.setPageCount(pageCount); + mdRecord.setSchemaName("html"); + return mdRecord; + } + + private MetadataRecord getEchoMetadata(XQueryEvaluator xQueryEvaluator, String echoDir, MetadataRecord mdRecord) throws ApplicationException { + if (echoDir == null || echoDir.isEmpty()) { + String docId = mdRecord.getDocId(); + echoDir = getEchoDir(xQueryEvaluator, docId); + if (echoDir == null) + return mdRecord; + } + String urLTexter = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=" + echoDir + "/index.meta"; + String echoIndexMetaStr = performGetRequest(urLTexter); + String echoPageImageDir = null; + String echoFiguresDir = null; + String mpiwgDocId = null; + if (echoIndexMetaStr != null) { + if (echoIndexMetaStr.equals("XXXXTimeoutXXXX")) + return null; + else if (echoIndexMetaStr.equals("XXXXUrlErrorXXXX")) + return mdRecord; + echoPageImageDir = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/texttool/image"); + if (echoPageImageDir != null) + echoPageImageDir = echoDir + "/" + echoPageImageDir; + else + echoPageImageDir = echoDir + "/" + "pageimg"; // default + echoFiguresDir = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/texttool/figures"); + if (echoFiguresDir != null) + echoFiguresDir = echoDir + "/" + echoFiguresDir; + else + echoFiguresDir = echoDir + "/" + "figures"; // default + mpiwgDocId = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/dri[@type = 'mpiwg']"); + } + mdRecord.setEchoId(echoDir); + mdRecord.setEchoPageImageDir(echoPageImageDir); + mdRecord.setEchoFiguresDir(echoFiguresDir); + mdRecord.setMpiwgDocId(mpiwgDocId); + return mdRecord; + } + + private String getEchoDir(XQueryEvaluator xQueryEvaluator, String docId) throws ApplicationException { + String echoDir = null; + String urLTextUrlPath = "http://md.mpiwg-berlin.mpg.de/purls/searchSolr?text-url-path=" + docId + "&format=short"; + String resultXmlStr = performGetRequest(urLTextUrlPath); + if (resultXmlStr != null) { + if (resultXmlStr.equals("XXXXTimeoutXXXX")) + return null; + else if (resultXmlStr.equals("XXXXUrlErrorXXXX")) + return null; + String archivePath = xQueryEvaluator.evaluateAsStringValueJoined(resultXmlStr, "//archive-path"); + if (archivePath != null) { + archivePath = archivePath.replaceAll("/mpiwg/online", ""); + if (archivePath.isEmpty()) + echoDir = null; + else + echoDir = archivePath; + } + } + return echoDir; + } + + private String getNodeType(XdmNode node) { + String nodeType = null; + XdmSequenceIterator iter = node.axisIterator(Axis.CHILD); + if (iter != null) { + while (iter.hasNext()) { + XdmNode firstChild = (XdmNode) iter.next(); + if (firstChild != null) { + XdmNodeKind nodeKind = firstChild.getNodeKind(); + if (nodeKind.ordinal() == XdmNodeKind.ELEMENT.ordinal()) { + QName nodeQName = firstChild.getNodeName(); + nodeType = nodeQName.getLocalName(); + } + } + } + } + return nodeType; + } + + public String getDocFullFileName(String docId) { + String docDir = getDocDir(docId); + String docFileName = getDocFileName(docId); + String docFullFileName = docDir + "/" + docFileName; + return docFullFileName; + } + + public String getFullFileName(String docId, String type) { + String docDir = getDocDir(docId); + String docFileName = getDocFileName(docId); + int lastDot = docFileName.lastIndexOf("."); + String docFileNameWithoutExtension = docFileName.substring(0, lastDot); + String fullFileName = docDir + "/" + docFileNameWithoutExtension + ".xml"; + if (type != null && ! type.equals("toc")) { + fullFileName = docDir + "/" + docFileNameWithoutExtension + "." + type; + } else if (type != null && type.equals("toc")) { + fullFileName = docDir + "/toc.xml"; + } + return fullFileName; + } + + public String getDocDir(String docId) { + String documentsDirectory = Constants.getInstance().getDocumentsDir(); + String subDir = docId; + if (docId.contains(".")) { + int index = docId.lastIndexOf("."); + subDir = docId.substring(0, index); + } + if (! subDir.startsWith("/")) + subDir = "/" + subDir; + String docDir = documentsDirectory + subDir; + return docDir; + } + + public String getDocFileName(String docId) { + String docFileName = docId; + int index = docId.lastIndexOf("/"); + if (index != -1) { + docFileName = docId.substring(index + 1); + } + return docFileName; + } + + private String getMainLanguage(String docId) { + String mainLang = null; + int to = docId.lastIndexOf("/"); + if (to != -1) { + String preStr = docId.substring(0, to); + int from = preStr.lastIndexOf("/"); + if (from != -1) + mainLang = preStr.substring(from + 1, to); + } + return mainLang; + } + + private String deleteSpecialChars(String inputStr) { + StringBuilder buf = new StringBuilder(); + for (int i = 0; i < inputStr.length(); i++) { + char c = inputStr.charAt(i); + String replace = new String(); + switch (c) { + case '@': replace = ""; break; + case ' ': replace = ""; break; + case ';': replace = ""; break; + default: replace += c; break; + } + buf.append(replace); + } + return buf.toString(); + } + + private Hashtable getFragments(String fileName, String milestoneElementName) throws ApplicationException { + try { + GetFragmentsContentHandler getFragmentsContentHandler = new GetFragmentsContentHandler(milestoneElementName); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(getFragmentsContentHandler); + StringReader bla = new StringReader(FileUtils.readFileToString(new File(fileName), "utf-8")); + InputSource inputSource = new InputSource(bla); + xmlParser.parse(inputSource); + Hashtable resultFragments = getFragmentsContentHandler.getResultPages(); + return resultFragments; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String tokenizeWithLemmas(String xmlStr, String language) throws ApplicationException { + StringReader strReader = new StringReader(xmlStr); + XmlTokenizer xmlTokenizer = new XmlTokenizer(strReader); + xmlTokenizer.setLanguage(language); + String[] outputOptionsWithLemmas = {"withLemmas"}; // so all tokens are fetched with lemmas (costs performance) + // non word breaking elements; + // TODO examine bugs with emph, figure, hi : + // e.g. "... der Capi-talist. Es ..." + // e.g. page 30 in /echo/la/Cataneo_1600.xml + String[] nwbElements = {"lb", "br", "cb"}; + xmlTokenizer.setNWBElements(nwbElements); + xmlTokenizer.setOutputOptions(outputOptionsWithLemmas); + xmlTokenizer.tokenize(); + String retStr = xmlTokenizer.getXmlResult(); + return retStr; + } + + private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { + try { + WordContentHandler wordContentHandler = new WordContentHandler(); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(wordContentHandler); + StringReader strReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(strReader); + xmlParser.parse(inputSource); + String result = wordContentHandler.getResult(); + return result; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String performGetRequest(String url) throws ApplicationException { + String resultStr = null; + try { + boolean urlIsOk = checkUri(url, 2000); // if url doesn't answer after 2 seconds + if (! urlIsOk) + return "XXXXTimeoutXXXX"; + HttpClient httpClient = new HttpClient(); + GetMethod method = new GetMethod(url); + httpClient.executeMethod(method); + int statusCode = method.getStatusCode(); + if (statusCode >= 400) + return "XXXXUrlErrorXXXX"; + byte[] resultBytes = method.getResponseBody(); + resultStr = new String(resultBytes, "utf-8"); + method.releaseConnection(); + } catch (HttpException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return resultStr; + } + + private boolean checkUri(String uriStr, int timeoutMilliseconds) throws ApplicationException { + boolean isOk = true; + try { + URI uri = new URI(uriStr); + HttpGet httpGet = new HttpGet(uri); + HttpParams httpParameters = new BasicHttpParams(); + // Set the timeout in milliseconds until a connection is established. + // The default value is zero, that means the timeout is not used. + int timeoutConnection = 2000; + HttpConnectionParams.setConnectionTimeout(httpParameters, timeoutConnection); + // Set the default socket timeout (SO_TIMEOUT) + // in milliseconds which is the timeout for waiting for data. + int timeoutSocket = 2000; + HttpConnectionParams.setSoTimeout(httpParameters, timeoutSocket); + DefaultHttpClient httpClient = new DefaultHttpClient(httpParameters); + HttpResponse response = httpClient.execute(httpGet); + } catch (IOException e) { + isOk = false; // if timeout exception is thrown + } catch (URISyntaxException e) { + throw new ApplicationException(e); + } + return isOk; + } + + /** + * Write string into destFile. If directory for that destFile does not exist + * it creates this directory including parent directories. + * @param str string to write + * @param destFileName destination file name + * @throws ApplicationException + */ + private void saveFile(String str, String destFileName) throws ApplicationException { + OutputStreamWriter out = null; + try { + if (str == null) + return; // do nothing + File destFile = new File(destFileName); + File destDir = new File(destFile.getParent()); + if (! destDir.exists()) { + destDir.mkdirs(); // create the directory including parent directories which do not exist + } + out = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(destFile)), "utf-8"); + out.write(str); + out.flush(); + } catch (FileNotFoundException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } finally { + try { + if (out != null) + out.close(); + } catch (Exception e) { + // nothing: always close the stream at the end of the method + } + } + } + + private void beginOperation() { + beginOfOperation = new Date().getTime(); + } + + private void endOperation() { + endOfOperation = new Date().getTime(); + } + +} \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/Hits.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/Hits.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,60 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.util.ArrayList; + +import org.apache.lucene.search.Query; + +public class Hits { + private ArrayList hits; + private Query query; + private int from; + private int to; + private int size = 0; + + public Hits(ArrayList hits, int from, int to) { + this.hits = hits; + this.from = from; + this.to = to; + } + + public int getSize() { + return size; + } + + public void setSize(int size) { + this.size = size; + } + + public Query getQuery() { + return query; + } + + public void setQuery(Query query) { + this.query = query; + } + + public ArrayList getHits() { + return hits; + } + + public void setHits(ArrayList hits) { + this.hits = hits; + } + + public int getFrom() { + return from; + } + + public void setFrom(int from) { + this.from = from; + } + + public int getTo() { + return to; + } + + public void setTo(int to) { + this.to = to; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/MetadataRecord.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/MetadataRecord.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,217 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.util.Calendar; +import java.util.Date; + +public class MetadataRecord { + private String docId; // local id: document identifier in index system, e.g. /echo/la/Benedetti_1585.xml + private String identifier; // local id: identifier field in documents metadata: e.g. /echo:echo/echo:metadata/dcterms:identifier + private String uri; // global id: document URI (uniform resource identifier), e.g. http://de.wikipedia.org/wiki/Ramones + private String language; + private String creator; // author + private String title; + private String description; // abstract etc. + private String subject; // subject keywords from the title or description or content or subject lists (thesaurus etc.) + private String publisher; // publisher with place: e.g. Springer, New York + private String type; // mime type: e.g. text/xml // TODO ist eigentlich das Feld "format" --> zus. instance variable "format" definieren + private String rights; // e.g. open access + private Date date; // creation date, modification date, etc. + private String license; // e.g. http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration + private String accessRights; // e.g. free + private String collectionNames; // e.g. "collection1 collection7" + private String schemaName; // e.g. TEI, echo, html, or archimedes + private Date lastModified; + private int pageCount; + private String echoId; // document identifier in echo system: directory name, e.g. /permanent/library/163127KK + private String echoPageImageDir; // document page image directory in echo system: directory name, e.g. /permanent/library/163127KK/pageimg + private String echoFiguresDir; // document figures directory in echo system: directory name, e.g. /permanent/library/163127KK/figures + private String mpiwgDocId; // mpiwg docId in echo system: e.g. MPIWG:U7FWNX06 + + public String getDocId() { + return docId; + } + + public void setDocId(String docId) { + this.docId = docId; + } + + public String getUri() { + return uri; + } + + public void setUri(String uri) { + this.uri = uri; + } + + public String getRights() { + return rights; + } + + public void setRights(String rights) { + this.rights = rights; + } + + public int getPageCount() { + return pageCount; + } + + public void setPageCount(int pageCount) { + this.pageCount = pageCount; + } + + public String getLicense() { + return license; + } + + public void setLicense(String license) { + this.license = license; + } + + public String getAccessRights() { + return accessRights; + } + + public void setAccessRights(String accessRights) { + this.accessRights = accessRights; + } + + public String getCreator() { + return creator; + } + + public void setCreator(String creator) { + this.creator = creator; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Date getDate() { + return date; + } + + public void setDate(Date date) { + this.date = date; + } + + public String getYear() { + String year = null; + if (date != null) { + Calendar cal = Calendar.getInstance(); + cal.setTime(date); + int iYear = cal.get(Calendar.YEAR); + year = "" + iYear; + } + return year; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getSubject() { + return subject; + } + + public void setSubject(String subject) { + this.subject = subject; + } + + public String getCollectionNames() { + return collectionNames; + } + + public void setCollectionNames(String collectionNames) { + this.collectionNames = collectionNames; + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public String getLanguage() { + return language; + } + + public void setLanguage(String language) { + this.language = language; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getSchemaName() { + return schemaName; + } + + public void setSchemaName(String schemaName) { + this.schemaName = schemaName; + } + + public Date getLastModified() { + return lastModified; + } + + public void setLastModified(Date lastModified) { + this.lastModified = lastModified; + } + + public String getEchoId() { + return echoId; + } + + public void setEchoId(String echoId) { + this.echoId = echoId; + } + + public String getEchoPageImageDir() { + return echoPageImageDir; + } + + public void setEchoPageImageDir(String echoPageImageDir) { + this.echoPageImageDir = echoPageImageDir; + } + + public String getEchoFiguresDir() { + return echoFiguresDir; + } + + public void setEchoFiguresDir(String echoFiguresDir) { + this.echoFiguresDir = echoFiguresDir; + } + + public String getMpiwgDocId() { + return mpiwgDocId; + } + + public void setMpiwgDocId(String mpiwgDocId) { + this.mpiwgDocId = mpiwgDocId; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,403 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.Hashtable; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.io.FileUtils; +import org.xhtmlrenderer.layout.SharedContext; +import org.xhtmlrenderer.pdf.ITextFontResolver; +import org.xhtmlrenderer.pdf.ITextRenderer; +import org.xhtmlrenderer.util.XRRuntimeException; + +import com.lowagie.text.DocumentException; +import com.lowagie.text.pdf.BaseFont; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer; + +public class PdfHandler { + private static PdfHandler instance; + private static String CSS_SHOW_WORD_URL = "http://thrax.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/pageOrig.css"; + private static String CSS_URL = "http://thrax.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/page.css"; + // private static String CSS_DOCUVIEWER_URL = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView/template/docuviewer_css"; + private ITextRenderer renderer = new ITextRenderer(); + private Hashtable fontFileNames; + private DocumentHandler docHandler; + private TocTransformer tocTransformer; + private PageTransformer pageTransformer; + + public static PdfHandler getInstance() throws ApplicationException { + if (instance == null) { + instance = new PdfHandler(); + instance.init(); + } + return instance; + } + + public void init() throws ApplicationException { + renderer = new ITextRenderer(); + SharedContext rendererSharedContext = renderer.getSharedContext(); + PdfHandlerUserAgent mpdlUserAgent = new PdfHandlerUserAgent(); // user agent to get a callback handle to the web access of images (getImageResource(url)) + mpdlUserAgent.setSharedContext(rendererSharedContext); + rendererSharedContext.setUserAgentCallback(mpdlUserAgent); + fontFileNames = new Hashtable(); + String fontJunicodeFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Regular.ttf"; + String fontJunicodeBoldFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Bold.ttf"; + String fontJunicodeItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Italic.ttf"; + String fontJunicodeBoldItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-BoldItalic.ttf"; + String fontSunExtAFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtA.ttf"; // chinese symbols + String fontSunExtBFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtB.ttf"; // chinese symbols + String fontDejaVuFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/DejaVuSans.ttf"; // arabic symbols + setFont(fontJunicodeFileName); + setFont(fontJunicodeBoldFileName); + setFont(fontJunicodeItalicFileName); + setFont(fontJunicodeBoldItalicFileName); // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml) + setFont(fontSunExtAFileName); + setFont(fontSunExtBFileName); + setFont(fontDejaVuFileName); + docHandler = new DocumentHandler(); + tocTransformer = new TocTransformer(); + pageTransformer = new PageTransformer(); + } + + public void createFile(boolean pdf, boolean html, MetadataRecord mdRecord) throws ApplicationException { + OutputStream osPdf = null; + OutputStream osHtml = null; + OutputStream osHtmlPdf = null; + String docId = mdRecord.getDocId(); + String language = mdRecord.getLanguage(); + if (docId == null) + throw new ApplicationException("Pdf/Html-Generation failed: no docId given in mdRecord"); + String docDir = docHandler.getDocDir(docId); + String docFileName = docHandler.getDocFileName(docId); + int lastDot = docFileName.lastIndexOf("."); + String docFileNameWithoutExtension = docFileName.substring(0, lastDot); + String docIdExtension = docFileName.substring(lastDot + 1); + String destFileNamePdf = docDir + "/" + docFileNameWithoutExtension + ".pdf"; + String destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + ".html"; + if (docIdExtension != null && docIdExtension.equals("html")) { + destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + "-gen.html"; + } + String destFileNameHtmlPdfTmp = docDir + "/" + docFileNameWithoutExtension + "-4Pdf.html"; + try { + // start document + if (pdf) { + osPdf = new FileOutputStream(new File(destFileNamePdf)); + osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp)); + } + if (html) + osHtml = new FileOutputStream(new File(destFileNameHtml)); + int countPages = mdRecord.getPageCount(); + // style page + String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;"; + // firstPage + String firstPageHtml = getFirstPageHtmlByEchodocuView(mdRecord); + String mdRecordStr = getMdRecordString(mdRecord); + String htmlHeadStr = getHtmlHead(null, mdRecordStr); + String fontStyle = getFontStyle(language); + if(pdf) { + write("" + htmlHeadStr + "", osHtmlPdf); + // first page + if (firstPageHtml == null) + firstPageHtml = getFirstPageHtml(mdRecord, false); // long first page + write(firstPageHtml, osHtmlPdf); + } + if (html) { + write("" + htmlHeadStr + "", osHtml); + // first page + write("
            ", osHtml); + if (firstPageHtml == null) + firstPageHtml = getFirstPageHtml(mdRecord, true); // short first page + write(firstPageHtml, osHtml); + write("
            ", osHtml); + } + // table of content of document + String htmlToc = getTocHtml(mdRecord); + if (html && htmlToc != null) { + write("
            ", osHtml); + write(htmlToc, osHtml); + write("
            ", osHtml); + } + if(pdf && htmlToc != null) { + write(htmlToc, osHtmlPdf); + } + // all pages of the document + for(int i=1; i<=countPages; i++) { + String htmlPageFragment = getPageFragmentHtml(mdRecord, i, pageTransformer); + htmlPageFragment = "
            " + htmlPageFragment + "
            "; + if (html) { + write("
            ", osHtml); + write("
            ", osHtml); + write("
            ", osHtml); + write(htmlPageFragment, osHtml); + write("
            ", osHtml); + } + if(pdf) { + write(htmlPageFragment, osHtmlPdf); + } + } + if (html) { + write("", osHtml); + } + // create PDF document + if(pdf) { + write("", osHtmlPdf); + osHtmlPdf.close(); + renderer.setDocument(new File(destFileNameHtmlPdfTmp)); + renderer.layout(); // takes the most time + renderer.createPDF(osPdf); + } + } catch (Exception e) { + init(); + String message = e.getMessage(); + if (message != null && message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } finally { + try { + osHtmlPdf.close(); + osPdf.close(); + osHtml.close(); + FileUtils.deleteQuietly(new File(destFileNameHtmlPdfTmp)); + } catch (IOException e) { + // nothing + } + } + } + + private String getFirstPageHtmlByEchodocuView(MetadataRecord mdRecord) { + String firstPageHtml = null; + try { + // Url to Echo viewer + String echoId = mdRecord.getEchoId(); + if (echoId == null) + return null; + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId + "&viewMode=indexonly"; + String wholeFirstPageHtml = performGetRequest(urlDocuView); + if (wholeFirstPageHtml != null & wholeFirstPageHtml.equals("XXXXUrlErrorXXXX")) + return null; + int from = wholeFirstPageHtml.indexOf(""); + int to = wholeFirstPageHtml.indexOf(""); + if (from != -1 && to != -1) { + firstPageHtml = "
            " + wholeFirstPageHtml.substring(from + 6, to) + "
            "; + } + } catch (ApplicationException e) { + // nothing + } + return firstPageHtml; + } + + private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) { + String author = mdRecord.getCreator(); + String title = mdRecord.getTitle(); + String year = mdRecord.getYear(); + String firstPageHtml = "
            "; + firstPageHtml = firstPageHtml + "

            " + "Max Planck Institute for the History of Science" + "

            "; + firstPageHtml = firstPageHtml + "

            " + "Max-Planck-Institut fŸr Wissenschaftsgeschichte" + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + if (! shortPage) { + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + } + if (author != null) { + firstPageHtml = firstPageHtml + "

            " + author + "

            "; + } + if (title != null) { + firstPageHtml = firstPageHtml + "

            " + title + "

            "; + } + if (year != null) { + firstPageHtml = firstPageHtml + "

            " + year + "

            "; + } + if (! shortPage) { + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + } + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + firstPageHtml = firstPageHtml + "

            "; + // Url to Echo viewer + String echoId = mdRecord.getEchoId(); + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView"; + String document = "?url=" + echoId; + String urlDoc = urlDocuView + document; + String echoLink = "" + urlDocuView + "

            " + document + "
            "; + if (echoId == null) + echoLink = "" + "http://echo.mpiwg-berlin.mpg.de" + ""; + firstPageHtml = firstPageHtml + "

            Document link:

            " + echoLink + "

            "; + firstPageHtml = firstPageHtml + "
            "; + return firstPageHtml; + } + + private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException { + String htmlStr = null; + try { + String docId = mdRecord.getDocId(); + String tocFileName = docHandler.getFullFileName(docId, "toc"); + File tocFile = new File(tocFileName); + String tocStr = FileUtils.readFileToString(tocFile, "utf-8"); + String htmlToc = tocTransformer.transform(tocStr, "toc", "html"); + if (htmlToc != null && ! htmlToc.isEmpty()) { + htmlStr = "
            " + "Table of contents" + htmlToc + "
            "; + } + } catch (IOException e) { + throw new ApplicationException(e); + } + return htmlStr; + } + + private String getPageFragmentHtml(MetadataRecord mdRecord, int pageNumber, PageTransformer pageTransformer) throws ApplicationException { + String pageHtmlStrFragment = null; + try { + String docId = mdRecord.getDocId(); + String docDir = docHandler.getDocDir(docId); + String docPageTokenizedFileName = docDir + "/pages/page-" + pageNumber + "-morph.xml"; + File docPageTokenizedFile = new File(docPageTokenizedFileName); + String tokenizedXmlStr = FileUtils.readFileToString(docPageTokenizedFile, "utf-8"); + pageTransformer.setDisplayWordOptions("orig"); // only orig word spans are build so that the HTML is not too huge for PDF generation + pageHtmlStrFragment = pageTransformer.transform(tokenizedXmlStr, mdRecord, pageNumber, "html"); + } catch (IOException e) { + throw new ApplicationException(e); + } + return pageHtmlStrFragment; + } + + private String getMdRecordString(MetadataRecord mdRecord) { + String author = mdRecord.getCreator(); + String title = mdRecord.getTitle(); + String year = mdRecord.getYear(); + String mdRecordStr = ""; + if (mdRecord != null) { + if (author != null && ! author.equals("")) { + mdRecordStr = mdRecordStr + author; + } + if (title != null && ! title.equals("")) { + mdRecordStr = mdRecordStr + ". " + title; + } + if (year != null && ! year.equals("")) { + mdRecordStr = mdRecordStr + ". " + year + "."; + } + if (mdRecordStr.isEmpty()) { + String docId = mdRecord.getDocId(); + mdRecordStr = mdRecordStr + docId; + } + } + return mdRecordStr; + } + + private String getHtmlHead(String stylePageStr, String titleStr) { + String htmlStr = ""; + if (stylePageStr != null) + htmlStr = htmlStr + ""; + htmlStr = htmlStr + "" + titleStr + ""; + htmlStr = htmlStr + ""; + htmlStr = htmlStr + ""; + htmlStr = htmlStr + ""; + return htmlStr; + } + + private String getFontStyle(String language) { + String fontFamily = "Junicode"; + if (language != null && language.equals("ar")) + fontFamily = "DejaVu Sans"; + else if (language != null && (language.equals("zh") || language.equals("zho-Hant"))) + fontFamily = "Sun-ExtA, Sun-ExtB"; + return "font-size:11pt; font-family:" + fontFamily + ";"; + } + + private void write(String str, OutputStream out) throws ApplicationException { + try { + byte[] bytes = str.getBytes("utf-8"); + out.write(bytes, 0, bytes.length); + out.flush(); + } catch (UnsupportedEncodingException e) { + throw new ApplicationException(e); + } catch (FileNotFoundException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void setFont(String fontFileName) throws ApplicationException { + try { + String existingFontFileName = fontFileNames.get(fontFileName); + if (existingFontFileName == null) { + fontFileNames.put(fontFileName, fontFileName); + ITextFontResolver fontResolver = renderer.getFontResolver(); + fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc + } + } catch (XRRuntimeException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: please try again later"); + } + throw new ApplicationException(e); + } catch (IOException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } catch (DocumentException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } + } + + private String performGetRequest(String url) throws ApplicationException { + String resultStr = null; + try { + HttpClient httpClient = new HttpClient(); + GetMethod method = new GetMethod(url); + httpClient.executeMethod(method); + int statusCode = method.getStatusCode(); + if (statusCode >= 400) + return "XXXXUrlErrorXXXX"; + byte[] resultBytes = method.getResponseBody(); + resultStr = new String(resultBytes, "utf-8"); + method.releaseConnection(); + } catch (HttpException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return resultStr; + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandlerUserAgent.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandlerUserAgent.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,149 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.logging.Logger; + +import org.xhtmlrenderer.layout.SharedContext; +import org.xhtmlrenderer.pdf.ITextFSImage; +import org.xhtmlrenderer.pdf.ITextOutputDevice; +import org.xhtmlrenderer.pdf.PDFAsImage; +import org.xhtmlrenderer.resource.ImageResource; +import org.xhtmlrenderer.swing.NaiveUserAgent; + +import com.lowagie.text.Image; +import com.lowagie.text.Rectangle; +import com.lowagie.text.pdf.PdfReader; + +public class PdfHandlerUserAgent extends NaiveUserAgent { + private static final int IMAGE_CACHE_CAPACITY = 32; + private static final float DEFAULT_DOTS_PER_POINT = 20f * 4f / 3f; + private static Logger LOGGER = Logger.getLogger(PdfHandlerUserAgent.class.getName()); + private SharedContext sharedContext; + private ITextOutputDevice outputDevice; + + public PdfHandlerUserAgent() { + super(IMAGE_CACHE_CAPACITY); + outputDevice = new ITextOutputDevice(DEFAULT_DOTS_PER_POINT); + } + + @SuppressWarnings("unchecked") + public ImageResource getImageResource(String inputUri) { + ImageResource resource = null; + String uri = resolveURI(inputUri); + resource = (ImageResource) _imageCache.get(uri); + if (resource == null) { + InputStream is = resolveAndOpenStream(uri); + if (is != null) { + try { + URL url = new URL(uri); + if (url.getPath() != null && url.getPath().toLowerCase().endsWith(".pdf")) { + PdfReader reader = outputDevice.getReader(url); + PDFAsImage image = new PDFAsImage(url); + Rectangle rect = reader.getPageSizeWithRotation(1); + image.setInitialWidth(rect.getWidth()*outputDevice.getDotsPerPoint()); + image.setInitialHeight(rect.getHeight()*outputDevice.getDotsPerPoint()); + resource = new ImageResource(image); + } else { + Image image = getImage(url); + if (image == null) + return null; + scaleToOutputResolution(image); + resource = new ImageResource(new ITextFSImage(image)); + } + _imageCache.put(uri, resource); + } catch (IOException e) { + LOGGER.severe("Can't get image file: unexpected problem for URI: '" + uri + "': " + e.getMessage()); + } finally { + try { + if (is != null) + is.close(); + } catch (IOException e) { + // ignore + } + } + } + } + if (resource == null) { + resource = new ImageResource(null); + } + return resource; + } + + private void scaleToOutputResolution(Image image) { + float factor = sharedContext.getDotsPerPixel(); + image.scaleAbsolute(image.getPlainWidth() * factor, image.getPlainHeight() * factor); + } + + public SharedContext getSharedContext() { + return sharedContext; + } + + public void setSharedContext(SharedContext sharedContext) { + this.sharedContext = sharedContext; + } + + private Image getImage(URL url) { + Image image = null; + try { + image = Image.getInstance(url); + } catch (Exception e) { + try { + Thread.sleep(1000); + } catch (InterruptedException ee) { + // nothing + } + LOGGER.severe("first retry to get image for URL '" + url.toString() + "': " + e.getMessage()); + try { + image = Image.getInstance(url); + } catch (Exception e2) { + try { + Thread.sleep(1000); + } catch (InterruptedException ee) { + // nothing + } + LOGGER.severe("second retry to get image for URL '" + url.toString() + "': " + e.getMessage()); + try { + image = Image.getInstance(url); + } catch (Exception e3) { + LOGGER.severe("third retry to get image for URL '" + url.toString() + "': " + e.getMessage()); + return null; + } + } + } + return image; + } + + protected InputStream resolveAndOpenStream(String inputUri) { + InputStream is = null; + String uri = resolveURI(inputUri); + try { + is = new URL(uri).openStream(); + } catch (Exception e) { + try { + Thread.sleep(1000); + } catch (InterruptedException ee) { + // nothing + } + LOGGER.severe("first retry to open stream for URL '" + uri + "': " + e.getMessage()); + try { + is = new URL(uri).openStream(); + } catch (Exception e2) { + try { + Thread.sleep(1000); + } catch (InterruptedException ee) { + // nothing + } + LOGGER.severe("second retry to open stream for URL '" + uri + "': " + e.getMessage()); + try { + is = new URL(uri).openStream(); + } catch (Exception e3) { + LOGGER.severe("third retry to open stream for URL '" + uri + "': " + e.getMessage()); + return null; + } + } + } + return is; + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/Token.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/Token.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,29 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import org.apache.lucene.index.Term; + +public class Token { + private Term term; + private int freq = -1; + + public Token(Term term) { + this.term = term; + } + + public Term getTerm() { + return term; + } + + public void setTerm(Term term) { + this.term = term; + } + + public int getFreq() { + return freq; + } + + public void setFreq(int freq) { + this.freq = freq; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/general/Constants.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/general/Constants.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,55 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.general; + +import java.net.URL; +import java.util.Properties; + +import de.mpg.mpiwg.berlin.mpdl.util.Util; + +public class Constants { + private static Constants instance; + private Properties properties; + + public static Constants getInstance() { + if (instance == null) { + instance = new Constants(); + instance.init(); + } + return instance; + } + + private void init() { + URL url = Constants.class.getClassLoader().getResource("constants.properties"); + if (url != null) { + String propertiesFileName = url.toString().substring(5); + properties = (new Util()).getProperties(propertiesFileName); + } + } + + public String getDocumentsDir() { + if (properties != null) + return properties.getProperty("documentsDir"); + else + return "no properties file"; + } + + public String getLuceneDocumentsDir() { + if (properties != null) + return properties.getProperty("luceneDocumentsDir"); + else + return "no properties file"; + } + + public String getLuceneNodesDir() { + if (properties != null) + return properties.getProperty("luceneNodesDir"); + else + return "no properties file"; + } + + public String getConfDir() { + if (properties != null) + return properties.getProperty("confDir"); + else + return "no properties file"; + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/harvester/PathExtractor.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,137 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.harvester; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.DefaultHttpClient; + +public class PathExtractor { + private List ressourceLoc; + String excludes; + + public PathExtractor() { + + } + + public List initExtractor(String startingUri, String excludes) { + this.excludes = excludes; + ressourceLoc = new ArrayList(); + // parameter necessery, because it's recursive, thus changing the uri + extractDocLocations(startingUri); + System.out.println("extracing resource locations done."); + return this.ressourceLoc; + } + + /** + * recursive Method to extract the path of the resources + * + * @param startUrl + */ + private void extractDocLocations(String startUrl) { + HttpClient client = new DefaultHttpClient(); + HttpGet httpget = new HttpGet(startUrl); + HttpResponse resp = null; + try { + resp = client.execute(httpget); + } catch (IOException e) { + e.printStackTrace(); + } + HttpEntity entity = resp.getEntity(); + if (entity != null) { + XMLInputFactory iFactory = XMLInputFactory.newInstance(); + XMLStreamReader reader = null; + try { + reader = iFactory.createXMLStreamReader(entity.getContent()); + } catch (IllegalStateException e1) { + e1.printStackTrace(); + } catch (XMLStreamException e1) { + e1.printStackTrace(); + } catch (IOException e1) { + e1.printStackTrace(); + } + + try { + while (true) { + int event = reader.next(); + if (event == XMLStreamConstants.END_DOCUMENT) { + reader.close(); + break; + } + if (event == XMLStreamConstants.START_ELEMENT) { + if ((reader.getAttributeValue(null, "name")) != null) { + if (reader.getLocalName().equals("collection") && !(startUrl.endsWith(reader.getAttributeValue(null, "name")))) { + if(!(this.excludes.contains(reader.getAttributeValue(null, "name").toLowerCase()))){ + if (reader.getAttributeValue(null, "name").startsWith("/")) { + client.getConnectionManager().closeExpiredConnections(); + extractDocLocations(startUrl + reader.getAttributeValue(null, "name")); + } else { + client.getConnectionManager().closeExpiredConnections(); + if (!startUrl.endsWith("/")) { + extractDocLocations(startUrl + "/" + reader.getAttributeValue(null, "name")); + } else { + extractDocLocations(startUrl + reader.getAttributeValue(null, "name")); + } + } + } + } + if (reader.getLocalName().equals("resource")) { + if (!startUrl.endsWith("/")) { + ressourceLoc.add(startUrl + "/" + reader.getAttributeValue(null, "name")); + } else { + ressourceLoc.add(startUrl + reader.getAttributeValue(null, "name")); + } + } + } + } + if (event == XMLStreamConstants.ATTRIBUTE) { + // System.out.println("localName : "+reader.getLocalName()); + } + } + } catch (XMLStreamException e) { + e.printStackTrace(); + } + } + } + + /** + * extrahiert ebenso wie extractDocLocations(String startUri) Pfade, tut dies + * aber local und nicht über HTTP + * + * @return + */ + public List extractPathLocally(String startUrl) { + List pathList = new ArrayList(); + + // home verzeichnis pfad über system variable + // String loc = System.getenv("HOME")+"/wsp/configs"; + // out.println("hom variable + conf datei : "+loc); + File f = new File(startUrl); + // out.println("readable : "+Boolean.toString(f.canRead())); + // out.println("readable : "+f.isDirectory()); + if (f.isDirectory()) { + File[] filelist = f.listFiles(); + for (File file : filelist) { + if (file.getName().toLowerCase().contains("config")) { + if (!startUrl.endsWith("/")) { + pathList.add(startUrl + "/" + file.getName()); + } else { + pathList.add(startUrl + file.getName()); + } + } + } + } + return pathList; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/lucene/IndexHandler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,1463 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.lucene; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import org.apache.commons.io.FileUtils; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.SetBasedFieldSelector; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SearcherManager; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.search.highlight.TokenSources; +import org.apache.lucene.search.similar.MoreLikeThis; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpg.mpiwg.berlin.mpdl.cms.confmanager.CollectionReader; +import de.mpg.mpiwg.berlin.mpdl.cms.confmanager.ConfManagerResultWrapper; +import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits; +import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Token; +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.XslResourceTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.translator.MicrosoftTranslator; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; +import de.mpg.mpiwg.berlin.mpdl.lt.text.norm.Normalizer; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizerContentHandler; +import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; +import de.mpg.mpiwg.berlin.mpdl.util.Util; + +public class IndexHandler { + private static IndexHandler instance; + private IndexWriter documentsIndexWriter; + private IndexWriter nodesIndexWriter; + private SearcherManager documentsSearcherManager; + private SearcherManager nodesSearcherManager; + private IndexReader documentsIndexReader; + private PerFieldAnalyzerWrapper documentsPerFieldAnalyzer; + private PerFieldAnalyzerWrapper nodesPerFieldAnalyzer; + + + public static IndexHandler getInstance() throws ApplicationException { + if (instance == null) { + instance = new IndexHandler(); + instance.init(); + } + return instance; + } + + private void init() throws ApplicationException { + documentsIndexWriter = getDocumentsWriter(); + documentsIndexWriter.setMaxFieldLength(1000000); + nodesIndexWriter = getNodesWriter(); + nodesIndexWriter.setMaxFieldLength(1000000); + documentsSearcherManager = getNewSearcherManager(documentsIndexWriter); + nodesSearcherManager = getNewSearcherManager(nodesIndexWriter); + documentsIndexReader = getDocumentsReader(); + } + + public void indexDocument(CmsDocOperation docOperation) throws ApplicationException { + try { + // first delete document in documentsIndex and nodesIndex + deleteDocumentLocal(docOperation); + indexDocumentLocal(docOperation); + documentsIndexWriter.commit(); + nodesIndexWriter.commit(); + } catch (Exception e) { + try { + documentsIndexWriter.rollback(); + nodesIndexWriter.rollback(); + } catch (Exception ex) { + // nothing + } + throw new ApplicationException(e); + } + } + + private void indexDocumentLocal(CmsDocOperation docOperation) throws ApplicationException { + FileReader fr = null; + try { + MetadataRecord mdRecord = docOperation.getMdRecord(); + String docId = mdRecord.getDocId(); + DocumentHandler docHandler = new DocumentHandler(); + String docFileName = docHandler.getDocFullFileName(docId) + ".upgrade"; + // add document to documentsIndex + Document doc = new Document(); + Field docIdField = new Field("docId", docId, Field.Store.YES, Field.Index.ANALYZED); + doc.add(docIdField); + String docIdSortedStr = docId.toLowerCase(); // so that sorting is lower case + Field docIdFieldSorted = new Field("docIdSorted", docIdSortedStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(docIdFieldSorted); + String identifier = mdRecord.getIdentifier(); + if (identifier != null) { + Field identifierField = new Field("identifier", identifier, Field.Store.YES, Field.Index.ANALYZED); + doc.add(identifierField); + } + String uri = docOperation.getSrcUrl(); + if (uri != null) { + Field uriField = new Field("uri", uri, Field.Store.YES, Field.Index.ANALYZED); + doc.add(uriField); + } + String collectionNames = docOperation.getCollectionNames(); + if (collectionNames != null) { + Field collectionNamesField = new Field("collectionNames", collectionNames, Field.Store.YES, Field.Index.ANALYZED); + doc.add(collectionNamesField); + } + if (mdRecord.getCreator() != null) { + String authorStr = mdRecord.getCreator(); + Field authorField = new Field("author", authorStr, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(authorField); + if (authorStr != null) + authorStr = authorStr.toLowerCase(); // so that sorting is lower case + Field authorFieldSorted = new Field("authorSorted", authorStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(authorFieldSorted); + } + if (mdRecord.getTitle() != null) { + String titleStr = mdRecord.getTitle(); + Field titleField = new Field("title", titleStr, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(titleField); + if (titleStr != null) + titleStr = titleStr.toLowerCase(); // so that sorting is lower case + Field titleFieldSorted = new Field("titleSorted", titleStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(titleFieldSorted); + } + if (mdRecord.getLanguage() != null) { + String langStr = mdRecord.getLanguage(); + if (langStr != null) + langStr = langStr.toLowerCase(); // all language codes are lower case + Field languageField = new Field("language",langStr, Field.Store.YES, Field.Index.ANALYZED); + doc.add(languageField); + Field languageFieldSorted = new Field("languageSorted", langStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(languageFieldSorted); + } + if (mdRecord.getPublisher() != null) { + String publisherStr = mdRecord.getPublisher(); + Field publisherField = new Field("publisher", publisherStr, Field.Store.YES, Field.Index.ANALYZED); + doc.add(publisherField); + if (publisherStr != null) + publisherStr = publisherStr.toLowerCase(); // so that sorting is lower case + Field publisherFieldSorted = new Field("publisherSorted", publisherStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(publisherFieldSorted); + } + if (mdRecord.getYear() != null) { + Field dateField = new Field("date", mdRecord.getYear(), Field.Store.YES, Field.Index.ANALYZED); + doc.add(dateField); + Field dateFieldSorted = new Field("dateSorted", mdRecord.getYear(), Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(dateFieldSorted); + } + if (mdRecord.getSubject() != null) { + Field subjectField = new Field("subject", mdRecord.getSubject(), Field.Store.YES, Field.Index.ANALYZED); + doc.add(subjectField); + } + if (mdRecord.getRights() != null) { + Field rightsField = new Field("rights", mdRecord.getRights(), Field.Store.YES, Field.Index.ANALYZED); + doc.add(rightsField); + } + if (mdRecord.getLicense() != null) { + Field licenseField = new Field("license", mdRecord.getLicense(), Field.Store.YES, Field.Index.ANALYZED); + doc.add(licenseField); + } + if (mdRecord.getAccessRights() != null) { + Field accessRightsField = new Field("accessRights", mdRecord.getAccessRights(), Field.Store.YES, Field.Index.ANALYZED); + doc.add(accessRightsField); + } + String echoId = mdRecord.getEchoId(); + if (echoId != null) { + Field echoIdField = new Field("echoId", echoId, Field.Store.YES, Field.Index.ANALYZED); + doc.add(echoIdField); + } + String echoPageImageDir = mdRecord.getEchoPageImageDir(); + if (echoPageImageDir != null) { + Field echoPageImageDirField = new Field("echoPageImageDir", echoPageImageDir, Field.Store.YES, Field.Index.ANALYZED); + doc.add(echoPageImageDirField); + } + String echoFiguresDir = mdRecord.getEchoFiguresDir(); + if (echoFiguresDir != null) { + Field echoFiguresDirField = new Field("echoFiguresDir", echoFiguresDir, Field.Store.YES, Field.Index.ANALYZED); + doc.add(echoFiguresDirField); + } + String mpiwgDocId = mdRecord.getMpiwgDocId(); + if (mpiwgDocId != null) { + Field mpiwgDocIdField = new Field("mpiwgDocId", mpiwgDocId, Field.Store.YES, Field.Index.ANALYZED); + doc.add(mpiwgDocIdField); + } + if (mdRecord.getLastModified() != null) { + Date lastModified = mdRecord.getLastModified(); + String xsDateStr = new Util().toXsDate(lastModified); + Field lastModifiedField = new Field("lastModified", xsDateStr, Field.Store.YES, Field.Index.ANALYZED); + doc.add(lastModifiedField); + long time = lastModified.getTime(); + String timeStr = String.valueOf(time); + Field lastModifiedFieldSorted = new Field("lastModifiedSorted", timeStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(lastModifiedFieldSorted); + } + if (mdRecord.getSchemaName() != null) { + String schemNameStr = mdRecord.getSchemaName(); + Field schemaField = new Field("schemaName", schemNameStr, Field.Store.YES, Field.Index.ANALYZED); + doc.add(schemaField); + if (schemNameStr != null) + schemNameStr = schemNameStr.toLowerCase(); // so that sorting is lower case + Field schemaFieldSorted = new Field("schemaNameSorted", schemNameStr, Field.Store.YES, Field.Index.NOT_ANALYZED); + doc.add(schemaFieldSorted); + } + + String language = mdRecord.getLanguage(); + InputStreamReader docFileReader = new InputStreamReader(new FileInputStream(docFileName), "utf-8"); + // to guarantee that utf-8 is used (if not done, it does not work on Tomcat which has another default charset) + XmlTokenizer docXmlTokenizer = new XmlTokenizer(docFileReader); + docXmlTokenizer.setDocIdentifier(docId); + docXmlTokenizer.setLanguage(language); + docXmlTokenizer.setOutputFormat("string"); + String[] outputOptionsWithLemmas = { "withLemmas" }; // so all tokens are + // fetched with lemmas (costs performance) + docXmlTokenizer.setOutputOptions(outputOptionsWithLemmas); + String[] normFunctionNone = { "none" }; + docXmlTokenizer.setNormFunctions(normFunctionNone); + docXmlTokenizer.tokenize(); + + int pageCount = docXmlTokenizer.getPageCount(); + if (pageCount == 0) + pageCount = 1; // each document at least has one page + String pageCountStr = String.valueOf(pageCount); + Field pageCountField = new Field("pageCount", pageCountStr, Field.Store.YES, Field.Index.ANALYZED); + doc.add(pageCountField); + + String[] outputOptionsEmpty = {}; + docXmlTokenizer.setOutputOptions(outputOptionsEmpty); + // must be set to null so that the normalization function works + String docTokensOrig = docXmlTokenizer.getStringResult(); + String[] normFunctionReg = { "reg" }; + docXmlTokenizer.setNormFunctions(normFunctionReg); + String docTokensReg = docXmlTokenizer.getStringResult(); + String[] normFunctionNorm = { "norm" }; + docXmlTokenizer.setNormFunctions(normFunctionNorm); + String docTokensNorm = docXmlTokenizer.getStringResult(); + docXmlTokenizer.setOutputOptions(outputOptionsWithLemmas); + String docTokensMorph = docXmlTokenizer.getStringResult(); + + Field tokenOrigField = new Field("tokenOrig", docTokensOrig, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field tokenRegField = new Field("tokenReg", docTokensReg, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field tokenNormField = new Field("tokenNorm", docTokensNorm, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field tokenMorphField = new Field("tokenMorph", docTokensMorph, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(tokenOrigField); + doc.add(tokenRegField); + doc.add(tokenNormField); + doc.add(tokenMorphField); + + // save original content of the doc file + File docFile = new File(docFileName); + String contentXml = FileUtils.readFileToString(docFile, "utf-8"); + Field contentXmlField = new Field("xmlContent", contentXml, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(contentXmlField); + + // generate original chars content + XslResourceTransformer charsTransformer = new XslResourceTransformer("chars.xsl"); + String content = charsTransformer.transform(docFileName); + Field contentField = new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + doc.add(contentField); + + documentsIndexWriter.addDocument(doc); + + // add all elements with the specified names of the document to nodesIndex + String[] elementNamesArray = docOperation.getElementNames(); + String elementNames = ""; + for (int i = 0; i < elementNamesArray.length; i++) { + String elemName = elementNamesArray[i]; + elementNames = elementNames + elemName + " "; + } + elementNames = elementNames.substring(0, elementNames.length() - 1); + ArrayList elements = docXmlTokenizer.getElements(elementNames); + for (int i = 0; i < elements.size(); i++) { + XmlTokenizerContentHandler.Element element = elements.get(i); + Document nodeDoc = new Document(); + nodeDoc.add(docIdField); + String nodeLanguage = element.lang; + if (nodeLanguage == null) + nodeLanguage = language; + String nodePageNumber = String.valueOf(element.pageNumber); + String nodeLineNumber = String.valueOf(element.lineNumber); + String nodeElementName = String.valueOf(element.name); + String nodeElementDocPosition = String.valueOf(element.docPosition); + String nodeElementAbsolutePosition = String.valueOf(element.position); + String nodeElementPagePosition = String.valueOf(element.pagePosition); + String nodeElementPosition = String.valueOf(element.elemPosition); + String nodeXmlId = element.xmlId; + String nodeXpath = element.xpath; + String nodeXmlContent = element.toXmlString(); + String nodeTokensOrig = element.getTokensStr("orig"); + String nodeTokensReg = element.getTokensStr("reg"); + String nodeTokensNorm = element.getTokensStr("norm"); + String nodeTokensMorph = element.getTokensStr("morph"); + if (nodeLanguage != null) { + Field nodeLanguageField = new Field("language", nodeLanguage, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeLanguageField); + } + Field nodePageNumberField = new Field("pageNumber", nodePageNumber, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodePageNumberField); + Field nodeLineNumberField = new Field("lineNumber", nodeLineNumber, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeLineNumberField); + Field nodeElementNameField = new Field("elementName", nodeElementName, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeElementNameField); + Field nodeElementDocPositionField = new Field("elementDocPosition", nodeElementDocPosition, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeElementDocPositionField); + Field nodeElementDocPositionFieldSorted = new Field("elementDocPositionSorted", nodeElementDocPosition, Field.Store.YES, Field.Index.NOT_ANALYZED); + nodeDoc.add(nodeElementDocPositionFieldSorted); + Field nodeElementAbsolutePositionField = new Field("elementAbsolutePosition", nodeElementAbsolutePosition, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeElementAbsolutePositionField); + Field nodeElementPagePositionField = new Field("elementPagePosition", nodeElementPagePosition, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeElementPagePositionField); + Field nodeElementPositionField = new Field("elementPosition", nodeElementPosition, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeElementPositionField); + if (nodeXmlId != null) { + Field nodeXmlIdField = new Field("xmlId", nodeXmlId, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeXmlIdField); + } + if (nodeXpath != null) { + Field nodeXpathField = new Field("xpath", nodeXpath, Field.Store.YES, Field.Index.ANALYZED); + nodeDoc.add(nodeXpathField); + } + if (nodeXmlContent != null) { + Field nodeXmlContentField = new Field("xmlContent", nodeXmlContent, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + nodeDoc.add(nodeXmlContentField); + } + if (nodeXmlContent != null) { + String nodeXmlContentTokenized = toTokenizedXmlString(nodeXmlContent, nodeLanguage); + byte[] blabla = nodeXmlContentTokenized.getBytes("utf-8"); // TODO why is tokenizedXmlStr not already utf-8 on page 444 Benedetti ? + nodeXmlContentTokenized = new String(blabla, "utf-8"); + nodeXmlContentTokenized = enrichWordsOrigRegNorm(nodeXmlContentTokenized); + Field nodeXmlContentTokenizedField = new Field("xmlContentTokenized", nodeXmlContentTokenized, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + nodeDoc.add(nodeXmlContentTokenizedField); + } + if (nodeTokensOrig != null) { + Field nodeTokenOrigField = new Field("tokenOrig", nodeTokensOrig, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + nodeDoc.add(nodeTokenOrigField); + } + if (nodeTokensReg != null) { + Field nodeTokenRegField = new Field("tokenReg", nodeTokensReg, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + nodeDoc.add(nodeTokenRegField); + } + if (nodeTokensNorm != null) { + Field nodeTokenNormField = new Field("tokenNorm", nodeTokensNorm, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + nodeDoc.add(nodeTokenNormField); + } + if (nodeTokensMorph != null) { + Field nodeTokenMorphField = new Field("tokenMorph", nodeTokensMorph, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + nodeDoc.add(nodeTokenMorphField); + } + + nodesIndexWriter.addDocument(nodeDoc); + } + } catch (Exception e) { + throw new ApplicationException(e); + } finally { + try { + if (fr != null) + fr.close(); + } catch (Exception e) { + // nothing + } + } + } + + public void deleteDocument(CmsDocOperation docOperation) throws ApplicationException { + try { + deleteDocumentLocal(docOperation); + documentsIndexWriter.commit(); + nodesIndexWriter.commit(); + } catch (Exception e) { + try { + documentsIndexWriter.rollback(); + nodesIndexWriter.rollback(); + } catch (Exception ex) { + // nothing + } + throw new ApplicationException(e); + } + } + + private void deleteDocumentLocal(CmsDocOperation docOperation) throws ApplicationException { + String docId = docOperation.getDocIdentifier(); + try { + Term termIdentifier = new Term("docId", docId); + documentsIndexWriter.deleteDocuments(termIdentifier); + nodesIndexWriter.deleteDocuments(termIdentifier); + } catch (Exception e) { + throw new ApplicationException(e); + } + } + + public Hits queryDocuments(String queryStr, String[] sortFieldNames, String language, int from, int to, boolean withHitFragments, boolean translate) throws ApplicationException { + Hits hits = null; + IndexSearcher searcher = null; + try { + makeDocumentsSearcherManagerUpToDate(); + searcher = documentsSearcherManager.acquire(); + String defaultQueryFieldName = "tokenOrig"; + QueryParser queryParser = new QueryParser(Version.LUCENE_35, defaultQueryFieldName, documentsPerFieldAnalyzer); + Query query = null; + if (queryStr.equals("*")) { + query = new MatchAllDocsQuery(); + } else { + query = queryParser.parse(queryStr); + } + Query morphQuery = buildMorphQuery(query, language, false, translate); + Query highlighterQuery = buildMorphQuery(query, language, true, translate); + if (query instanceof PhraseQuery || query instanceof PrefixQuery || query instanceof FuzzyQuery || query instanceof TermRangeQuery) { + highlighterQuery = query; // TODO wenn sie rekursiv enthalten sind + } + String beginHitMark = "!!!BEGIN_HIT!!!"; + String endHitMark = "!!!END_HIT!!!"; + SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(beginHitMark, endHitMark); // marks should not contain xml special chars + QueryScorer queryScorer = new QueryScorer(highlighterQuery); + Highlighter highlighter = new Highlighter(htmlFormatter, queryScorer); + TopDocs resultDocs = null; + if (sortFieldNames != null) { + Sort sort = buildSort(sortFieldNames, "doc"); // build sort criteria + resultDocs = searcher.search(morphQuery, 10000, sort); + } else { + resultDocs = searcher.search(morphQuery, 10000); + } + resultDocs.setMaxScore(1); + int toTmp = to; + if (resultDocs.scoreDocs.length <= to) + toTmp = resultDocs.scoreDocs.length - 1; + if (resultDocs != null) { + ArrayList docs = new ArrayList(); + for (int i=from; i<=toTmp; i++) { + int docID = resultDocs.scoreDocs[i].doc; + FieldSelector docFieldSelector = getDocFieldSelector(); + Document luceneDoc = searcher.doc(docID, docFieldSelector); + de.mpg.mpiwg.berlin.mpdl.cms.document.Document doc = new de.mpg.mpiwg.berlin.mpdl.cms.document.Document(luceneDoc); + if (withHitFragments) { + ArrayList hitFragments = new ArrayList(); + Fieldable docContentField = luceneDoc.getFieldable("content"); + if (docContentField != null) { + String docContent = docContentField.stringValue(); + TokenStream tokenStream = TokenSources.getAnyTokenStream(this.documentsIndexReader, docID, docContentField.name(), luceneDoc, documentsPerFieldAnalyzer); + // highlighter.setMaxDocCharsToAnalyze(100000); // the first 100000 chars are fetched maximal, but performance is not really better + TextFragment[] textfragments = highlighter.getBestTextFragments(tokenStream, docContent, false, 5); + if (textfragments.length > 0) { + for (int j=0; j"); + textFragment = textFragment.replaceAll(endHitMark, ""); + hitFragments.add(checkHitFragment(textFragment)); + } + } + } + if (! hitFragments.isEmpty()) + doc.setHitFragments(hitFragments); + } + docs.add(doc); + } + if (docs != null) { + hits = new Hits(docs, from, to); + hits.setSize(resultDocs.scoreDocs.length); + hits.setQuery(morphQuery); + } + } + } catch (Exception e) { + throw new ApplicationException(e); + } finally { + try { + if (searcher != null) + documentsSearcherManager.release(searcher); + } catch (IOException e) { + // nothing + } + } + // Do not use searcher after this! + searcher = null; + return hits; + } + + public Hits queryDocument(String docId, String queryStr, int from, int to) throws ApplicationException { + Hits hits = null; + IndexSearcher searcher = null; + MetadataRecord docMetadataRecord = getDocMetadata(docId); + if (docMetadataRecord == null) + return null; // no document with that docId is in index + try { + makeNodesSearcherManagerUpToDate(); + searcher = nodesSearcherManager.acquire(); + String fieldNameDocId = "docId"; + Query queryDocId = new QueryParser(Version.LUCENE_35, fieldNameDocId, nodesPerFieldAnalyzer).parse(docId); + String defaultQueryFieldName = "tokenOrig"; + Query query = new QueryParser(Version.LUCENE_35, defaultQueryFieldName, nodesPerFieldAnalyzer).parse(queryStr); + String language = docMetadataRecord.getLanguage(); + if (language == null || language.equals("")) { + String collectionNames = docMetadataRecord.getCollectionNames(); + ConfManagerResultWrapper collectionInfo = CollectionReader.getInstance().getResultWrapper(collectionNames); + if (collectionInfo != null) { + String mainLang = collectionInfo.getMainLanguage(); + if (mainLang != null) + language = mainLang; + } + } + Query morphQuery = buildMorphQuery(query, language); + BooleanQuery queryDoc = new BooleanQuery(); + queryDoc.add(queryDocId, BooleanClause.Occur.MUST); + queryDoc.add(morphQuery, BooleanClause.Occur.MUST); + String[] sortFieldNames = {"elementDocPosition"}; + Sort sortByPosition = buildSort(sortFieldNames, "node"); + TopDocs topDocs = searcher.search(queryDoc, 100000, sortByPosition); + topDocs.setMaxScore(1); + int toTmp = to; + if (topDocs.scoreDocs.length <= to) + toTmp = topDocs.scoreDocs.length - 1; + if (topDocs != null) { + ArrayList docs = new ArrayList(); + for (int i=from; i<=toTmp; i++) { + int docID = topDocs.scoreDocs[i].doc; + FieldSelector nodeFieldSelector = getNodeFieldSelector(); + Document luceneDoc = searcher.doc(docID, nodeFieldSelector); + de.mpg.mpiwg.berlin.mpdl.cms.document.Document doc = new de.mpg.mpiwg.berlin.mpdl.cms.document.Document(luceneDoc); + docs.add(doc); + } + if (docs != null) { + hits = new Hits(docs, from, to); + hits.setSize(topDocs.scoreDocs.length); + } + } + searcher.close(); + } catch (Exception e) { + throw new ApplicationException(e); + } finally { + try { + if (searcher != null) + documentsSearcherManager.release(searcher); + } catch (IOException e) { + // nothing + } + } + // Do not use searcher after this! + searcher = null; + return hits; + } + + public Hits moreLikeThis(String docId, int from, int to) throws ApplicationException { + Hits hits = null; + ArrayList wspDocs = null; + IndexSearcher searcher1 = null; + IndexSearcher searcher2 = null; + try { + makeDocumentsSearcherManagerUpToDate(); + searcher1 = documentsSearcherManager.acquire(); + String fieldNameDocId = "docId"; + Query queryDocId = new QueryParser(Version.LUCENE_35, fieldNameDocId, documentsPerFieldAnalyzer).parse(docId); + TopDocs topDocs = searcher1.search(queryDocId, 100000); + topDocs.setMaxScore(1); + int docID = -1; + if (topDocs != null && topDocs.scoreDocs != null && topDocs.scoreDocs.length > 0) { + docID = topDocs.scoreDocs[0].doc; + } + makeDocumentsSearcherManagerUpToDate(); + searcher2 = documentsSearcherManager.acquire(); + MoreLikeThis mlt = new MoreLikeThis(documentsIndexReader); // TODO documentsIndexReader is ok ? + mlt.setFieldNames(new String[]{"content"}); // similarity function works against these fields + mlt.setMinWordLen(2); + mlt.setBoost(true); + Query queryMoreLikeThis = mlt.like(docID); + TopDocs moreLikeThisDocs = searcher2.search(queryMoreLikeThis, 10); + moreLikeThisDocs.setMaxScore(10); + if (moreLikeThisDocs != null) { + if (wspDocs == null) + wspDocs = new ArrayList(); + for (int i=0; i getToken(String fieldName, String value, int count) throws ApplicationException { + ArrayList retToken = null; + int counter = 0; + TermEnum terms = null; + try { + if (value == null) + value = ""; + Term term = new Term(fieldName, value); + makeIndexReaderUpToDate(); + terms = documentsIndexReader.terms(term); + while (terms != null && fieldName != null && fieldName.equals(terms.term().field()) && counter < count) { + if (retToken == null) + retToken = new ArrayList(); + Term termContent = terms.term(); + Token token = new Token(termContent); + retToken.add(token); + counter++; + if (!terms.next()) + break; + } + } catch (Exception e) { + throw new ApplicationException(e); + } finally { + if (terms != null) { + try { + terms.close(); + } catch (IOException e) { + // nothing + } + } + } + return retToken; + } + + public ArrayList getToken(String docId, String fieldName, String value, int count) throws ApplicationException { + ArrayList retToken = null; + if (value == null) + value = ""; + int counter = 0; + IndexSearcher searcher = null; + try { + makeDocumentsSearcherManagerUpToDate(); + makeIndexReaderUpToDate(); + searcher = documentsSearcherManager.acquire(); + Query queryDocId = new TermQuery(new Term("docId", docId)); + TopDocs topDocs = searcher.search(queryDocId, 1); + if (topDocs != null) { + int docIdInt = topDocs.scoreDocs[0].doc; + TermFreqVector termFreqVector = documentsIndexReader.getTermFreqVector(docIdInt, fieldName); + if (termFreqVector != null) { + String[] terms = termFreqVector.getTerms(); + int[] freqs = termFreqVector.getTermFrequencies(); + boolean success = false; + if (terms != null) { + retToken = new ArrayList(); + for (int i = 0; i < terms.length; i++) { + String termStr = terms[i]; + if (termStr.startsWith(value)) + success = true; + if (success) { + counter++; + int freq = freqs[i]; + Term t = new Term(fieldName, termStr); + Token tok = new Token(t); + tok.setFreq(freq); + retToken.add(tok); + } + if (counter >= count) + break; + } + } + } + } + } catch (Exception e) { + throw new ApplicationException(e); + } finally { + try { + if (searcher != null) + documentsSearcherManager.release(searcher); + } catch (IOException e) { + // nothing + } + } + // Do not use searcher after this! + searcher = null; + return retToken; + } + + public void end() throws ApplicationException { + try { + if (documentsIndexWriter != null) + documentsIndexWriter.close(); + if (nodesIndexWriter != null) + nodesIndexWriter.close(); + if (documentsSearcherManager != null) + documentsSearcherManager.close(); + if (nodesSearcherManager != null) + nodesSearcherManager.close(); + if (documentsIndexReader != null) + documentsIndexReader.close(); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private Query buildMorphQuery(Query query, String language) throws ApplicationException { + return buildMorphQuery(query, language, false, false); + } + + private Query buildMorphQuery(Query query, String language, boolean withAllForms, boolean translate) throws ApplicationException { + Query morphQuery = null; + if (query instanceof TermQuery) { + TermQuery termQuery = (TermQuery) query; + morphQuery = buildMorphQuery(termQuery, language, withAllForms, translate); + } else if (query instanceof BooleanQuery) { + BooleanQuery booleanQuery = (BooleanQuery) query; + morphQuery = buildMorphQuery(booleanQuery, language, withAllForms, translate); + } else { + morphQuery = query; // all other cases: PrefixQuery, PhraseQuery, FuzzyQuery, TermRangeQuery, ... + } + return morphQuery; + } + + private Query buildMorphQuery(TermQuery inputTermQuery, String fromLang, boolean withAllForms, boolean translate) throws ApplicationException { + String[] toLanguages = {"deu", "eng", "fra"}; // TODO + String fromLanguage = null; + String inputTerm = inputTermQuery.getTerm().text(); + if (fromLang == null) { + String detectedLang = MicrosoftTranslator.detectLanguageCode(inputTerm); + if (detectedLang != null) + fromLanguage = detectedLang; + } else { + fromLanguage = fromLang; + } + LexHandler lexHandler = LexHandler.getInstance(); + String fieldName = inputTermQuery.getTerm().field(); + ArrayList queryTerms = new ArrayList(); + if (fieldName != null && fieldName.equals("tokenMorph")) { + ArrayList lemmas = lexHandler.getLemmas(inputTerm, "form", fromLanguage, Normalizer.DICTIONARY, true); + if (lemmas == null) { // if no lemmas are found then do a query in tokenOrig TODO should this really be done ? + if (translate) { + String[] terms = {inputTerm}; + ArrayList translatedTerms = MicrosoftTranslator.translate(terms, fromLanguage, toLanguages); + for (int i=0; i morphTerms = new ArrayList(); + for (int i=0; i forms = lemma.getFormsList(); + for (int j=0; j translatedMorphTerms = MicrosoftTranslator.translate(morphTermsArray, fromLanguage, toLanguages); + for (int i=0; i forms = lemma.getFormsList(); + for (int j=0; j translatedTerms = MicrosoftTranslator.translate(terms, fromLanguage, toLanguages); + for (int i=0; i queryTerms) throws ApplicationException { + BooleanQuery retBooleanQuery = new BooleanQuery(); + for (int i = 0; i < queryTerms.size(); i++) { + TermQuery termQuery = queryTerms.get(i); + retBooleanQuery.add(termQuery, BooleanClause.Occur.SHOULD); + } + return retBooleanQuery; + } + + private Query buildMorphQuery(BooleanQuery query, String language, boolean withAllForms, boolean translate) throws ApplicationException { + BooleanQuery morphBooleanQuery = new BooleanQuery(); + BooleanClause[] booleanClauses = query.getClauses(); + for (int i = 0; i < booleanClauses.length; i++) { + BooleanClause boolClause = booleanClauses[i]; + Query q = boolClause.getQuery(); + Query morphQuery = buildMorphQuery(q, language, withAllForms, translate); + BooleanClause.Occur occur = boolClause.getOccur(); + morphBooleanQuery.add(morphQuery, occur); + } + return morphBooleanQuery; + } + + public ArrayList fetchTerms(String queryStr) throws ApplicationException { + ArrayList terms = null; + String defaultQueryFieldName = "tokenOrig"; + try { + Query query = new QueryParser(Version.LUCENE_35, defaultQueryFieldName, nodesPerFieldAnalyzer).parse(queryStr); + terms = fetchTerms(query); + } catch (Exception e) { + throw new ApplicationException(e); + } + return terms; + } + + /** + * recursively fetch all terms of the query + * + * @param query + * @return + */ + private ArrayList fetchTerms(Query query) throws ApplicationException { + ArrayList terms = new ArrayList(); + if (query instanceof TermQuery) { + TermQuery termQuery = (TermQuery) query; + String termQueryStr = termQuery.getTerm().text(); + terms.add(termQueryStr); + } else if (query instanceof BooleanQuery) { + BooleanQuery booleanQuery = (BooleanQuery) query; + terms = fetchTerms(booleanQuery); + } else { + String queryStr = query.toString(); + terms.add(queryStr); // all other cases: PrefixQuery, PhraseQuery, + // FuzzyQuery, TermRangeQuery, ... + } + return terms; + } + + private ArrayList fetchTerms(BooleanQuery query) throws ApplicationException { + ArrayList terms = new ArrayList(); + BooleanClause[] booleanClauses = query.getClauses(); + for (int i = 0; i < booleanClauses.length; i++) { + BooleanClause boolClause = booleanClauses[i]; + Query q = boolClause.getQuery(); + ArrayList qTerms = fetchTerms(q); + BooleanClause.Occur occur = boolClause.getOccur(); + if (occur == BooleanClause.Occur.SHOULD || occur == BooleanClause.Occur.MUST) + terms.addAll(qTerms); + } + return terms; + } + + public ArrayList fetchTerms(String queryStr, String language) throws ApplicationException { + ArrayList terms = null; + String defaultQueryFieldName = "tokenOrig"; + try { + Query query = new QueryParser(Version.LUCENE_35, defaultQueryFieldName, nodesPerFieldAnalyzer).parse(queryStr); + terms = fetchTerms(query, language); + } catch (Exception e) { + throw new ApplicationException(e); + } + return terms; + } + + /** + * recursively fetch all terms of the query + * + * @param query + * @return + */ + private ArrayList fetchTerms(Query query, String language) throws ApplicationException { + ArrayList terms = new ArrayList(); + if (query instanceof TermQuery) { + TermQuery termQuery = (TermQuery) query; + terms = fetchTerms(termQuery, language); + } else if (query instanceof BooleanQuery) { + BooleanQuery booleanQuery = (BooleanQuery) query; + terms = fetchTerms(booleanQuery, language); + } else { + String queryStr = query.toString(); + terms.add(queryStr); + // all other cases: PrefixQuery, PhraseQuery, FuzzyQuery, TermRangeQuery, ... + } + return terms; + } + + private ArrayList fetchTerms(TermQuery termQuery, String language) throws ApplicationException { + if (language == null) + language = "eng"; + ArrayList terms = new ArrayList(); + Term termQueryTerm = termQuery.getTerm(); + String term = termQuery.getTerm().text(); + String fieldName = termQueryTerm.field(); + if (fieldName != null && fieldName.equals("tokenMorph")) { + LexHandler lexHandler = LexHandler.getInstance(); + ArrayList lemmas = lexHandler.getLemmas(term, "form", language, Normalizer.DICTIONARY, true); + // TODO : language √ºber den translator service holen + if (lemmas == null) { + terms.add(term); + } else { + for (int i = 0; i < lemmas.size(); i++) { + Lemma lemma = lemmas.get(i); + ArrayList forms = lemma.getFormsList(); + for (int j = 0; j < forms.size(); j++) { + Form form = forms.get(j); + String formName = form.getFormName(); + terms.add(formName); + } + } + } + } else { + terms.add(term); + } + return terms; + } + + private ArrayList fetchTerms(BooleanQuery query, String language) throws ApplicationException { + ArrayList terms = new ArrayList(); + BooleanClause[] booleanClauses = query.getClauses(); + for (int i = 0; i < booleanClauses.length; i++) { + BooleanClause boolClause = booleanClauses[i]; + Query q = boolClause.getQuery(); + ArrayList qTerms = fetchTerms(q, language); + BooleanClause.Occur occur = boolClause.getOccur(); + if (occur == BooleanClause.Occur.SHOULD || occur == BooleanClause.Occur.MUST) + terms.addAll(qTerms); + } + return terms; + } + + private Document getDocument(String docId) throws ApplicationException { + Document doc = null; + IndexSearcher searcher = null; + try { + makeDocumentsSearcherManagerUpToDate(); + searcher = documentsSearcherManager.acquire(); + String fieldNameDocId = "docId"; + Query queryDocId = new QueryParser(Version.LUCENE_35, fieldNameDocId, documentsPerFieldAnalyzer).parse(docId); + TopDocs topDocs = searcher.search(queryDocId, 100000); + topDocs.setMaxScore(1); + if (topDocs != null && topDocs.scoreDocs != null && topDocs.scoreDocs.length > 0) { + int docID = topDocs.scoreDocs[0].doc; + FieldSelector docFieldSelector = getDocFieldSelector(); + doc = searcher.doc(docID, docFieldSelector); + } + searcher.close(); + } catch (Exception e) { + throw new ApplicationException(e); + } finally { + try { + if (searcher != null) + documentsSearcherManager.release(searcher); + } catch (IOException e) { + // nothing + } + } + // Do not use searcher after this! + searcher = null; + return doc; + } + + private IndexWriter getDocumentsWriter() throws ApplicationException { + IndexWriter writer = null; + String luceneDocsDirectoryStr = Constants.getInstance().getLuceneDocumentsDir(); + File luceneDocsDirectory = new File(luceneDocsDirectoryStr); + try { + Map documentsFieldAnalyzers = new HashMap(); + documentsFieldAnalyzers.put("docId", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("identifier", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("uri", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("collectionNames", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("author", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("title", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("language", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("publisher", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("date", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("subject", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("rights", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("license", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("accessRights", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("echoId", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("echoPageImageDir", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("echoFiguresDir", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("mpiwgDocId", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("type", new KeywordAnalyzer()); // e.g. mime type "text/xml" + documentsFieldAnalyzers.put("pageCount", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("schemaName", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("lastModified", new KeywordAnalyzer()); + documentsFieldAnalyzers.put("tokenOrig", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("tokenReg", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("tokenNorm", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("tokenMorph", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("xmlContent", new StandardAnalyzer(Version.LUCENE_35)); + documentsFieldAnalyzers.put("content", new StandardAnalyzer(Version.LUCENE_35)); + documentsPerFieldAnalyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_35), documentsFieldAnalyzers); + IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, documentsPerFieldAnalyzer); + conf.setOpenMode(OpenMode.CREATE_OR_APPEND); + conf.setRAMBufferSizeMB(300); // 300 MB because some documents are big; 16 MB is default + FSDirectory fsDirectory = FSDirectory.open(luceneDocsDirectory); + writer = new IndexWriter(fsDirectory, conf); + writer.commit(); // when directory is empty this creates init files + } catch (IOException e) { + throw new ApplicationException(e); + } + return writer; + } + + private IndexWriter getNodesWriter() throws ApplicationException { + IndexWriter writer = null; + String luceneNodesDirectoryStr = Constants.getInstance().getLuceneNodesDir(); + File luceneNodesDirectory = new File(luceneNodesDirectoryStr); + try { + Map nodesFieldAnalyzers = new HashMap(); + nodesFieldAnalyzers.put("docId", new KeywordAnalyzer()); + nodesFieldAnalyzers.put("language", new StandardAnalyzer(Version.LUCENE_35)); // language (through xml:id): e.g. "lat" + nodesFieldAnalyzers.put("pageNumber", new KeywordAnalyzer()); // page number (through element pb): e.g. "13" + nodesFieldAnalyzers.put("lineNumber", new KeywordAnalyzer()); // line number on the page (through element lb): e.g. "17" + nodesFieldAnalyzers.put("elementName", new KeywordAnalyzer()); // element name: e.g. "tei:s" + nodesFieldAnalyzers.put("elementDocPosition", new KeywordAnalyzer()); // absolute position in document: e.g. "4711" + nodesFieldAnalyzers.put("elementPosition", new KeywordAnalyzer()); // position in parent node (in relation to other nodes of the same name): e.g. "5" + nodesFieldAnalyzers.put("elementAbsolutePosition", new KeywordAnalyzer()); // absolute position in document (in relation to other nodes of the same name): e.g. "213" + nodesFieldAnalyzers.put("elementPagePosition", new KeywordAnalyzer()); // position in relation to other nodes of the same name: e.g. "213" + nodesFieldAnalyzers.put("xmlId", new KeywordAnalyzer()); // xml id: e.g. "4711bla" + nodesFieldAnalyzers.put("xpath", new KeywordAnalyzer()); // xpath: e.g. "/echo[1]/text[1]/p[1]/s[5]" + nodesFieldAnalyzers.put("tokenOrig", new StandardAnalyzer(Version.LUCENE_35)); + nodesFieldAnalyzers.put("tokenReg", new StandardAnalyzer(Version.LUCENE_35)); + nodesFieldAnalyzers.put("tokenNorm", new StandardAnalyzer(Version.LUCENE_35)); + nodesFieldAnalyzers.put("tokenMorph", new StandardAnalyzer(Version.LUCENE_35)); + nodesFieldAnalyzers.put("xmlContent", new StandardAnalyzer(Version.LUCENE_35)); + nodesPerFieldAnalyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(Version.LUCENE_35), nodesFieldAnalyzers); + IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, nodesPerFieldAnalyzer); + conf.setOpenMode(OpenMode.CREATE_OR_APPEND); + conf.setRAMBufferSizeMB(300); // 300 MB because some documents are big; 16 MB is default + FSDirectory fsDirectory = FSDirectory.open(luceneNodesDirectory); + writer = new IndexWriter(fsDirectory, conf); + writer.commit(); + } catch (IOException e) { + throw new ApplicationException(e); + } + return writer; + } + + private Sort buildSort(String[] sortFieldNames, String type) { + Sort sort = new Sort(); + ArrayList sortFields = new ArrayList(); + for (int i=0; i fields = new HashSet(); + fields.add("docId"); + fields.add("identifier"); + fields.add("uri"); + fields.add("collectionNames"); + fields.add("author"); + fields.add("title"); + fields.add("language"); + fields.add("publisher"); + fields.add("date"); + fields.add("subject"); + fields.add("rights"); + fields.add("license"); + fields.add("echoId"); + fields.add("echoPageImageDir"); + fields.add("echoFiguresDir"); + fields.add("mpiwgDocId"); + fields.add("type"); + fields.add("pageCount"); + fields.add("schemaName"); + fields.add("lastModified"); + fields.add("content"); + FieldSelector fieldSelector = new SetBasedFieldSelector(fields, fields); + return fieldSelector; + } + + private FieldSelector getNodeFieldSelector() { + HashSet fields = new HashSet(); + fields.add("docId"); + fields.add("language"); + fields.add("pageNumber"); + fields.add("lineNumber"); + fields.add("elementName"); + fields.add("elementDocPosition"); + fields.add("elementPosition"); + fields.add("elementAbsolutePosition"); + fields.add("elementPagePosition"); + fields.add("xmlId"); + fields.add("xpath"); + fields.add("xmlContent"); + fields.add("xmlContentTokenized"); + FieldSelector fieldSelector = new SetBasedFieldSelector(fields, fields); + return fieldSelector; + } + + private SearcherManager getNewSearcherManager(IndexWriter indexWriter) throws ApplicationException { + SearcherManager searcherManager = null; + try { + searcherManager = new SearcherManager(indexWriter, true, null, null); + } catch (IOException e) { + throw new ApplicationException(e); + } + return searcherManager; + } + + private IndexReader getDocumentsReader() throws ApplicationException { + IndexReader reader = null; + String luceneDocsDirectoryStr = Constants.getInstance().getLuceneDocumentsDir(); + File luceneDocsDirectory = new File(luceneDocsDirectoryStr); + try { + FSDirectory fsDirectory = FSDirectory.open(luceneDocsDirectory); + reader = IndexReader.open(fsDirectory, true); + } catch (IOException e) { + throw new ApplicationException(e); + } + return reader; + } + + private void makeIndexReaderUpToDate() throws ApplicationException { + try { + boolean isCurrent = documentsIndexReader.isCurrent(); + if (!isCurrent) { + documentsIndexReader = IndexReader.openIfChanged(documentsIndexReader); + } + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void makeDocumentsSearcherManagerUpToDate() throws ApplicationException { + try { + boolean isCurrent = documentsSearcherManager.isSearcherCurrent(); + if (!isCurrent) { + documentsSearcherManager.maybeReopen(); + } + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void makeNodesSearcherManagerUpToDate() throws ApplicationException { + try { + boolean isCurrent = nodesSearcherManager.isSearcherCurrent(); + if (!isCurrent) { + nodesSearcherManager.maybeReopen(); + } + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String toTokenizedXmlString(String xmlStr, String language) throws ApplicationException { + String xmlPre = ""; + String xmlPost = ""; + String xmlStrTmp = xmlPre + xmlStr + xmlPost; + StringReader xmlInputStringReader = new StringReader(xmlStrTmp); + XmlTokenizer xmlTokenizer = new XmlTokenizer(xmlInputStringReader); + xmlTokenizer.setLanguage(language); + String[] outputOptions = { "withLemmas" }; + xmlTokenizer.setOutputOptions(outputOptions); + xmlTokenizer.tokenize(); + String result = xmlTokenizer.getXmlResult(); + return result; + } + + private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { + try { + WordContentHandler wordContentHandler = new WordContentHandler(); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(wordContentHandler); + StringReader strReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(strReader); + xmlParser.parse(inputSource); + String result = wordContentHandler.getResult(); + return result; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String escapeLuceneChars(String inputStr) { + String luceneCharsStr = "+-&|!(){}[]^~*?:\\"; // Lucene escape symbols + StringBuilder retStrBuilder = new StringBuilder(); + for (int i = 0; i < inputStr.length(); i++) { + char c = inputStr.charAt(i); + if (luceneCharsStr.contains(String.valueOf(c))) + retStrBuilder.append("\\"); + retStrBuilder.append(c); + } + return retStrBuilder.toString(); + } + + /** + * sorgt f√ºr sinnvolle satzanf√§nge + * + * @param fragment + */ + private String checkHitFragment(String fragment) { + if (fragment.startsWith(".") + || fragment.startsWith(":") + || fragment.startsWith(",") + || fragment.startsWith("-") + || fragment.startsWith(";") + || fragment.startsWith("?") + || fragment.startsWith(")") + || fragment.startsWith("!")) { + fragment = fragment.substring(1, fragment.length()); + // finds first occurence of a given string out.println("first index of point : "+StringUtils.indexOfAny(fragment, ".")); + } + return fragment; + } + +} \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainScheduler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainScheduler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,243 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.scheduler; + +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.PriorityQueue; +import java.util.Queue; + +import java.util.logging.Logger; +import org.quartz.JobDataMap; +import org.quartz.JobDetail; +import org.quartz.JobExecutionContext; +import org.quartz.JobListener; +import org.quartz.SchedulerException; +import org.quartz.SimpleTrigger; +import org.quartz.Trigger; +import org.quartz.impl.StdSchedulerFactory; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; + +public class CmsChainScheduler { + private static CmsChainScheduler instance; + private static String CRUD_JOB = "MPDL_CRUD_JOB"; + private static String CRUD_TRIGGER = "MPDL_CRUD_TRIGGER"; + private static String CRUD_GROUP = "MPDL_CRUD_GROUP"; + private static Logger LOGGER = Logger.getLogger(CmsDocJob.class.getName()); + private org.quartz.Scheduler scheduler; + private JobListener jobListener; + private Queue docOperationQueue = new PriorityQueue(); + private HashMap finishedDocOperations = new HashMap(); + private boolean operationInProgress = false; + private int jobOrderId = 0; + + public static CmsChainScheduler getInstance() throws ApplicationException { + if (instance == null) { + instance = new CmsChainScheduler(); + instance.init(); + } + return instance; + } + + public CmsDocOperation doOperation(CmsDocOperation docOperation) throws ApplicationException { + jobOrderId++; + docOperation.setOrderId(jobOrderId); + queueOperation(docOperation); + scheduleNextOperation(); + return docOperation; + } + + public void finishOperation(CmsDocOperation docOperation) throws ApplicationException { + operationInProgress = false; + Date now = new Date(); + docOperation.setEnd(now); + docOperation.setStatus("finished"); + int jobId = new Integer(docOperation.getOrderId()); + finishedDocOperations.put(jobId, docOperation); + log(docOperation); + // schedule next job if there is one + scheduleNextOperation(); + } + + private void log(CmsDocOperation docOperation) { + Date startTime = docOperation.getStart(); + Date endTime = docOperation.getEnd(); + long executionTime = -1; + if (startTime != null && endTime != null) + executionTime = (endTime.getTime() - startTime.getTime()); + String jobInfo = "Document operation " + docOperation.toString() + ": started at: " + startTime + + " and ended at: " + endTime + " (needed time: " + executionTime + " ms)"; + LOGGER.info(jobInfo); + } + + public synchronized void scheduleNextOperation() throws ApplicationException { + if (isOperationInProgress()) { + // nothing, operation has to wait + } else { + CmsDocOperation docOperation = docOperationQueue.poll(); + if (docOperation == null) { + // if queue is empty then do nothing (there are no more operations to execute) + } else { + Date now = new Date(); + operationInProgress = true; + docOperation.setStart(now); + scheduleJob(docOperation, now); + } + } + } + + public ArrayList getDocOperations() throws ApplicationException { + ArrayList docOperations = new ArrayList(); + try { + // first: all finished jobs + Collection finiDocOperations = finishedDocOperations.values(); + docOperations.addAll(finiDocOperations); + // second: all currently executed jobs + if (operationInProgress) { + List currentJobs = (List) scheduler.getCurrentlyExecutingJobs(); + Iterator iter = currentJobs.iterator(); + while (iter.hasNext()) { + JobExecutionContext jobExecutionContext = iter.next(); + CmsDocOperation docOperation = getDocOperation(jobExecutionContext); + if (docOperation != null) { + docOperations.add(docOperation); + } + } + } + // third: all queued jobs + Iterator iter = docOperationQueue.iterator(); + while (iter.hasNext()) { + CmsDocOperation docOperation = iter.next(); + docOperations.add(docOperation); + } + } catch (SchedulerException e) { + LOGGER.severe(e.getMessage()); + throw new ApplicationException(e); + } + return docOperations; + } + + public CmsDocOperation getDocOperation(int jobId) throws ApplicationException { + CmsDocOperation docOperation = null; + try { + // first try: looks into currently executing jobs + if (operationInProgress) { + List currentJobs = (List) scheduler.getCurrentlyExecutingJobs(); + Iterator iter = currentJobs.iterator(); + while (iter.hasNext()) { + JobExecutionContext jobExecutionContext = iter.next(); + docOperation = getDocOperation(jobExecutionContext); + if (docOperation != null) { + int dopOpJobId = docOperation.getOrderId(); + if (jobId == dopOpJobId) + return docOperation; + } + } + } + // second try: look into finished jobs + docOperation = finishedDocOperations.get(new Integer(jobId)); + if (docOperation != null) { + return docOperation; + } + // third try: look into queued jobs + Iterator iter = docOperationQueue.iterator(); + while (iter.hasNext()) { + docOperation = iter.next(); + if (docOperation.getOrderId() == jobId) + return docOperation; + } + } catch (SchedulerException e) { + LOGGER.severe(e.getMessage()); + throw new ApplicationException(e); + } + // if not found return null + return null; + } + + public CmsDocOperation getDocOperation(JobExecutionContext jobExecutionContext) { + CmsDocOperation docOperation = null; + if (jobExecutionContext != null) { + JobDetail job = jobExecutionContext.getJobDetail(); + JobDataMap parameters = job.getJobDataMap(); + docOperation = (CmsDocOperation) parameters.get("operation"); + } + return docOperation; + } + + private void queueOperation(CmsDocOperation docOperation) { + int operationsBefore = docOperationQueue.size(); + if (operationsBefore == 0) + docOperation.setStatus("waiting in operation queue"); + else + docOperation.setStatus("waiting in operation queue: " + operationsBefore + " operations heve to be executed before this operation"); + docOperationQueue.offer(docOperation); + } + + private synchronized boolean isOperationInProgress() { + return operationInProgress; + } + + private void scheduleJob(CmsDocOperation docOperation, Date fireTime) throws ApplicationException { + try { + int jobId = docOperation.getOrderId(); + String jobName = CRUD_JOB + "-id-" + jobId + "-timeId-" + fireTime; + JobDetail job = new JobDetail(jobName, CRUD_GROUP, CmsDocJob.class); + JobDataMap parameters = new JobDataMap(); + parameters.put("operation", docOperation); + job.setJobDataMap(parameters); + job.addJobListener(jobListener.getName()); + String triggerName = CRUD_TRIGGER + "-id-" + jobId + "-timeId-" + fireTime; + Trigger trigger = new SimpleTrigger(triggerName, CRUD_GROUP, fireTime); + scheduler.scheduleJob(job, trigger); + String jobInfo = "Schedule document operation: " + docOperation.toString() + ": done at: " + fireTime.toString(); + LOGGER.info(jobInfo); + } catch (SchedulerException e) { + LOGGER.severe(e.getMessage()); + throw new ApplicationException(e); + } + } + + private void init() throws ApplicationException { + try { + if (scheduler == null) { + String quartzPath = getQuartzPath(); + StdSchedulerFactory schedulerFactory = new StdSchedulerFactory(quartzPath); + scheduler = schedulerFactory.getScheduler(); + jobListener = new CmsChainSchedulerListener(); + scheduler.addJobListener(jobListener); + scheduler.start(); + LOGGER.info("Started Quartz scheduler factory: " + quartzPath); + } + } catch (SchedulerException e) { + LOGGER.severe(e.getMessage()); + throw new ApplicationException(e); + } + } + + public void end() throws ApplicationException { + try { + if (scheduler != null) { + scheduler.shutdown(); + } + String quartzPath = getQuartzPath(); + LOGGER.info("Ended Quartz scheduler factory: " + quartzPath); + } catch (SchedulerException e) { + LOGGER.severe(e.getMessage()); + throw new ApplicationException(e); + } + } + + private String getQuartzPath() { + URL quartzUrl = CmsChainScheduler.class.getResource("quartz.properties"); + String quartzPath = quartzUrl.getPath(); + if (quartzPath.indexOf(".jar!") != -1) { + int beginIndex = quartzPath.indexOf(".jar!") + 6; + quartzPath = quartzPath.substring(beginIndex); + } + return quartzPath; + } +} \ No newline at end of file diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainSchedulerListener.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsChainSchedulerListener.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,53 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.scheduler; + +import java.util.logging.Logger; +import org.quartz.JobDataMap; +import org.quartz.JobDetail; +import org.quartz.JobExecutionContext; +import org.quartz.JobExecutionException; +import org.quartz.JobListener; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; + +public class CmsChainSchedulerListener implements JobListener { + private static Logger LOGGER = Logger.getLogger(CmsDocJob.class.getName()); + + public String getName() { + return "MpdlJobChainingListener"; + } + + public void jobToBeExecuted(JobExecutionContext inContext) { + } + + public void jobExecutionVetoed(JobExecutionContext inContext) { + String message = "Quartz: JobChainingListener: Job execution was vetoed."; + LOGGER.fine(message); + } + + public void jobWasExecuted(JobExecutionContext inContext, JobExecutionException inException) { + // after finishing his job it tries to schedule the next operation (if there is one in the queue) + CmsDocOperation docOperation = null; + try { + CmsChainScheduler mpdlChainScheduler = CmsChainScheduler.getInstance(); + docOperation = getDocOperation(inContext); + mpdlChainScheduler.finishOperation(docOperation); + } catch (ApplicationException e) { + if (docOperation != null) { + docOperation.setErrorMessage(e.getMessage()); + } + LOGGER.severe(e.getMessage()); + } + } + + private CmsDocOperation getDocOperation(JobExecutionContext context) { + CmsDocOperation docOperation = null; + if (context != null) { + JobDetail job = context.getJobDetail(); + JobDataMap parameters = job.getJobDataMap(); + docOperation = (CmsDocOperation) parameters.get("operation"); + } + return docOperation; + } + + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocJob.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocJob.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,75 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.scheduler; + +import java.util.Date; + +import java.util.logging.Logger; +import org.quartz.Job; +import org.quartz.JobDataMap; +import org.quartz.JobDetail; +import org.quartz.JobExecutionContext; +import org.quartz.JobExecutionException; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; + +public class CmsDocJob implements Job { + public static String STATUS_BEGIN = "started"; + private static Logger LOGGER = Logger.getLogger(CmsDocJob.class.getName()); + private JobExecutionContext currentExecutedContext; + + public void execute(JobExecutionContext context) throws JobExecutionException { + this.currentExecutedContext = context; + CmsDocOperation docOperation = getDocOperation(); + try { + docOperation.setStatus(STATUS_BEGIN); + String operationName = docOperation.getName(); + if (operationName.equals("create")) { + DocumentHandler docHandler = new DocumentHandler(); + docHandler.doOperation(docOperation); + } else if (operationName.equals("delete")) { + DocumentHandler docHandler = new DocumentHandler(); + docHandler.doOperation(docOperation); + } else if (operationName.equals("importDirectory")) { + DocumentHandler docHandler = new DocumentHandler(); + docHandler.doOperation(docOperation); + } + Date startingTime = docOperation.getStart(); + String jobInfo = "Document operation " + docOperation.toString() + ": started at: " + startingTime; + LOGGER.info(jobInfo); + this.currentExecutedContext = null; + } catch (Exception e) { + try { + // Quartz will automatically unschedule all triggers associated with this job so that it does not run again + CmsChainScheduler mpdlChainScheduler = CmsChainScheduler.getInstance(); + mpdlChainScheduler.finishOperation(docOperation); + String errorMessage = e.getMessage(); + if (errorMessage == null) { + Throwable t = e.getCause(); + if (t == null) { + errorMessage = e.toString(); + } else { + errorMessage = t.getMessage(); + } + } + docOperation.setErrorMessage(errorMessage); + LOGGER.severe(errorMessage); + JobExecutionException jobExecutionException = new JobExecutionException(e); + jobExecutionException.setUnscheduleAllTriggers(true); + throw jobExecutionException; + } catch (ApplicationException ex) { + // nothing + } + } + } + + private CmsDocOperation getDocOperation() { + CmsDocOperation docOperation = null; + if (currentExecutedContext != null) { + JobDetail job = currentExecutedContext.getJobDetail(); + JobDataMap parameters = job.getJobDataMap(); + docOperation = (CmsDocOperation) parameters.get("operation"); + } + return docOperation; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocOperation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/CmsDocOperation.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,160 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.scheduler; + +import java.util.Date; + +import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; + +public class CmsDocOperation implements Comparable { + private int id; + private Date start; + private Date end; + private String name; + private String status; + private String errorMessage; + private String uploadFileName; + private String srcUrl; + private String docIdentifier; + private String mainLanguage; + private String[] elementNames; // element names which should be indexed (e.g. "s head, caption") + private String collectionNames; // collections to which this document belongs separated by blanks (e.g. "collection1 collection7") + private MetadataRecord mdRecord; + + public CmsDocOperation(String name, String srcUrl, String uploadFileName, String docIdentifier) { + this.name = name; + this.srcUrl = srcUrl; + this.uploadFileName = uploadFileName; + this.docIdentifier = docIdentifier; + } + + public int compareTo(CmsDocOperation op) { + Integer opOrderId = new Integer(op.id); + Integer thisOrderId = new Integer(id); + return thisOrderId.compareTo(opOrderId); + } + + public boolean isFinished() { + if (status != null && status.equals("finished")) + return true; + else + return false; + } + + public boolean isError() { + if (errorMessage != null && errorMessage.length() > 0) + return true; + else + return false; + } + + public int getOrderId() { + return id; + } + + public void setOrderId(int orderId) { + this.id = orderId; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public Date getStart() { + return start; + } + + public void setStart(Date start) { + this.start = start; + } + + public Date getEnd() { + return end; + } + + public void setEnd(Date end) { + this.end = end; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getErrorMessage() { + return errorMessage; + } + + public void setErrorMessage(String errorMessage) { + this.errorMessage = errorMessage; + } + + public String getDocIdentifier() { + return docIdentifier; + } + + public void setDocIdentifier(String docIdentifier) { + this.docIdentifier = docIdentifier; + } + + public String getMainLanguage() { + return mainLanguage; + } + + public void setMainLanguage(String mainLanguage) { + this.mainLanguage = mainLanguage; + } + + public String[] getElementNames() { + return elementNames; + } + + public void setElementNames(String[] elementNames) { + this.elementNames = elementNames; + } + + public String getCollectionNames() { + return collectionNames; + } + + public void setCollectionNames(String collectionNames) { + this.collectionNames = collectionNames; + } + + public String getSrcUrl() { + return srcUrl; + } + + public void setSrcUrl(String srcUrl) { + this.srcUrl = srcUrl; + } + + public String getUploadFileName() { + return uploadFileName; + } + + public void setUploadFileName(String uploadFileName) { + this.uploadFileName = uploadFileName; + } + + public MetadataRecord getMdRecord() { + return mdRecord; + } + + public void setMdRecord(MetadataRecord mdRecord) { + this.mdRecord = mdRecord; + } + + public String toString() { + if (name.equals("delete")) + return name + "(" + id + ", " + docIdentifier + ")"; + else + return name + "(" + id + ", " + uploadFileName + ", " + docIdentifier + ")"; + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/quartz.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/scheduler/quartz.properties Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,9 @@ +org.quartz.scheduler.instanceName = MpdlScheduler +org.quartz.scheduler.instanceId = auto +org.quartz.scheduler.rmi.export = false +org.quartz.scheduler.rmi.proxy = false + +org.quartz.threadPool.class = org.quartz.simpl.SimpleThreadPool +org.quartz.threadPool.threadCount = 3 + +org.quartz.jobStore.class = org.quartz.simpl.RAMJobStore diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,451 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.test; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.StringReader; +import java.io.StringWriter; +import java.net.URL; +import java.util.ArrayList; +import java.util.Date; +import java.util.Hashtable; + +import javax.xml.transform.stream.StreamSource; + +import net.sf.saxon.s9api.Processor; +import net.sf.saxon.s9api.QName; +import net.sf.saxon.s9api.Serializer; +import net.sf.saxon.s9api.XdmNode; +import net.sf.saxon.s9api.XsltCompiler; +import net.sf.saxon.s9api.XsltExecutable; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.Term; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import com.sun.org.apache.xerces.internal.parsers.SAXParser; + +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsChainScheduler; +import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; +import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizerContentHandler; +import de.mpg.mpiwg.berlin.mpdl.util.Util; +import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.GetFragmentsContentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.XslResourceTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits; +import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; +import de.mpg.mpiwg.berlin.mpdl.cms.document.Token; + +public class TestLocal { + private IndexHandler indexer; + + public static void main(String[] args) throws ApplicationException { + try { + TestLocal test = new TestLocal(); + test.init(); + // test.importAllDocuments(); + // test.createAllPdfInDirectory(); + // test.testTransform(); + // test.testXml(); + // test.generateToc(); + test.testCalls(); + // test.generatePdf(); + // test.xquery(); + // test.createToc(); + // test.testScheduler(); + // test.getDocInfo(); + // test.testChars(); + test.end(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private void init() throws ApplicationException { + indexer = IndexHandler.getInstance(); + } + + private void end() throws ApplicationException { + indexer.end(); + } + + private void testXml() throws ApplicationException { + try { + DocumentHandler docHandler = new DocumentHandler(); + String docDirName = docHandler.getDocDir("/echo/la/Benedetti_1585_163127KK.xml"); + String pageXmlFileName = docDirName + "/pages" + "/page-" + "444" + ".xml"; + File pageXmlFile = new File(pageXmlFileName); + String pageXmlStr = null; + if (pageXmlFile.exists()) + pageXmlStr = FileUtils.readFileToString(pageXmlFile, "utf-8"); + System.out.println(pageXmlStr); + String tokStr = tokenizeWithLemmas(pageXmlStr, "lat"); + System.out.println(tokStr); + tokStr = "" + tokStr; + byte[] blablabla = tokStr.getBytes("utf-8"); + String blablu = new String(blablabla, "utf-8"); + String bla = enrichWordsOrigRegNorm(blablu); + System.out.println(bla); + + XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); + URL url = new URL("file:/var/yp/Test_1789.xml"); + XdmNode docNode = xQueryEvaluator.parse(url); // if it is not parseable an exception with a detail message is thrown + + File srcFile = new File("/Users/jwillenborg/mpdl/data/xml/documents/tei/de/dt-ptolemaeus-tei-merge2.xml"); + FileReader docFileReader = new FileReader(srcFile); + XmlTokenizer docXmlTokenizer = new XmlTokenizer(docFileReader); + docXmlTokenizer.setDocIdentifier("/tei/de/dt-ptolemaeus-tei-merge2.xml"); + docXmlTokenizer.tokenize(); + ArrayList elements = docXmlTokenizer.getElements("s"); + String blabla = ""; + } catch (Exception e) { + e.printStackTrace(); + } + } + + private void importAllDocuments() throws ApplicationException { + DocumentHandler docHandler = new DocumentHandler(); + CmsDocOperation docOperation = new CmsDocOperation("importDirectory", "file:/Users/jwillenborg/test/documents", null, null); + docOperation.setCollectionNames("echo"); + docHandler.doOperation(docOperation); + } + + private void createAllPdfInDirectory() throws ApplicationException { + DocumentHandler docHandler = new DocumentHandler(); + CmsDocOperation docOperation = new CmsDocOperation("createAllPdfInDirectory", "file:/Users/jwillenborg/test/documents", null, null); + docOperation.setCollectionNames("echo"); + docHandler.doOperation(docOperation); + } + + private void generatePdf() throws ApplicationException { + long begin = new Date().getTime(); + DocumentHandler docHandler = new DocumentHandler(); + // String docId = "/echo/la/Benedetti_1585_163127KK.xml"; + String docId = "/diverse/de/Einst_Ueber_de_1907_02.xml"; + // String docId = "/archimedes/it/caver_metod_020_it_1891.xml"; + CmsDocOperation docOperation = new CmsDocOperation("createPdf", null, null, docId); + docHandler.doOperation(docOperation); + long end = new Date().getTime(); + System.out.println("Needed time: " + (end - begin)); + } + + private void testChars() throws ApplicationException { + String docId = "/test/benedetti/page-444.xml"; + String docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docId; + DocumentHandler docHandler = new DocumentHandler(); + CmsDocOperation docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docId); + docHandler.doOperation(docOperation); + } + + private void testCalls() throws ApplicationException { + Date before = new Date(); + System.out.println("Indexing start: " + before.getTime()); + String docIdGoerz = "/tei/de/dt-ptolemaeus-tei-merge2.xml"; + String docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdGoerz; + DocumentHandler docHandler = new DocumentHandler(); + CmsDocOperation docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdGoerz); + // docHandler.doOperation(docOperation); + String docIdSchulz = "/tei/de/Schulz_2009.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdSchulz; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdSchulz); + // docHandler.doOperation(docOperation); + String docIdBenedetti = "/echo/la/Benedetti_1585_163127KK.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdBenedetti; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdBenedetti); + // docHandler.doOperation(docOperation); + String docIdSongYingxing = "/echo/zh/SongYingxing_1637.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdSongYingxing; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdSongYingxing); + // docHandler.doOperation(docOperation); + String docIdMonte = "/archimedes/la/monte_mecha_036_la_1577.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdMonte; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdMonte); + // docHandler.doOperation(docOperation); + String docIdEinstein = "/diverse/de/Einst_Antwo_de_1912.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinstein; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinstein); + // docHandler.doOperation(docOperation); + String docIdEinsteinUeber = "/diverse/de/Einst_Ueber_de_1907_02.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinsteinUeber; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinsteinUeber); + // docHandler.doOperation(docOperation); + String docIdTest = "/echo/zh/Yulei_tushuo_2_FN1CTY5C.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdTest; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdTest); + // docHandler.doOperation(docOperation); + String docIdMega = "/test/mega/MEGA_A2_B013-00_ETX.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdMega; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdMega); + // docHandler.doOperation(docOperation); + String docIdDiverse = "/diverse/en/078_A_1916.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdDiverse; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdDiverse); + // docHandler.doOperation(docOperation); + String docIdEinstGrossmann = "/diverse/de/EinsteinGrossmann.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinstGrossmann; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinstGrossmann); + // docHandler.doOperation(docOperation); + String docIdEinstGrund = "/diverse/en/078_A_1916.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinstGrund; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinstGrund); + // docHandler.doOperation(docOperation); + String docIdVolta = "/archimedes/it/volta_nuoMemLetTerz_922_it_1795.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdVolta; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdVolta); + // docHandler.doOperation(docOperation); + String docIdVitruv = "/echo/it/Vitruvius_1747_Y1G1TRCW.xml"; + docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdVitruv; + docHandler = new DocumentHandler(); + docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdVitruv); + docHandler.doOperation(docOperation); + // indexer.deleteDocument(docIdGoerz); + // indexer.deleteDocument(docIdBenedetti); + MorphologyCache.getInstance().end(); + LexHandler.getInstance().end(); + } + + private Hashtable getFragments(String fileName) throws ApplicationException { + try { + GetFragmentsContentHandler getFragmentsContentHandler = new GetFragmentsContentHandler(); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(getFragmentsContentHandler); + InputSource inputSource = new InputSource(fileName); + xmlParser.parse(inputSource); + Hashtable resultFragments = getFragmentsContentHandler.getResultPages(); + return resultFragments; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void testTransform() throws ApplicationException { + Date begin = new Date(); + XslResourceTransformer xslResourceTransformer = new XslResourceTransformer("pageXml.xsl"); + xslResourceTransformer = new XslResourceTransformer("pageTei.xsl"); + xslResourceTransformer = new XslResourceTransformer("pageArchimedes.xsl"); + xslResourceTransformer = new XslResourceTransformer("pageXhtml.xsl"); + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + Date end = new Date(); + System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); + begin = new Date(); + String docFilePath = "/Users/jwillenborg/mpdl/data/xml/documents/echo/la/Benedetti_1585/pages/page-13-morph.xml"; + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + String result = xslResourceTransformer.transform(docFilePath); + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + result = xslResourceTransformer.transform(docFilePath); + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + result = xslResourceTransformer.transform(docFilePath); + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + result = xslResourceTransformer.transform(docFilePath); + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + result = xslResourceTransformer.transform(docFilePath); + end = new Date(); + System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); + xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); + begin = new Date(); + result = xslResourceTransformer.transform(docFilePath); + result = xslResourceTransformer.transform(docFilePath); + result = xslResourceTransformer.transform(docFilePath); + result = xslResourceTransformer.transform(docFilePath); + result = xslResourceTransformer.transform(docFilePath); + end = new Date(); + System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); + try { + Processor processor = new Processor(false); + XsltCompiler xsltCompiler = processor.newXsltCompiler(); + URL xslUrl = XslResourceTransformer.class.getResource("pageEcho.xsl"); + StreamSource xslStreamSource = new StreamSource(xslUrl.openStream()); + XsltExecutable xsltExecutable = xsltCompiler.compile(xslStreamSource); + net.sf.saxon.s9api.XsltTransformer xsltTransformer = xsltExecutable.load(); + Serializer serializer = new Serializer(); + serializer.setOutputWriter(new StringWriter()); + begin = new Date(); + for (int i=0; i<=5; i++) { + StreamSource xmlDoc = new StreamSource(docFilePath); + xsltTransformer.setSource(xmlDoc); // needs some time for bigger documents + xsltTransformer.setDestination(serializer); + xsltTransformer.transform(); // needs some time for bigger documents + result = serializer.getOutputDestination().toString(); + } + end = new Date(); + System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); + } catch (Exception e) { + + } + } + + private String tokenizeXmlFragment() throws ApplicationException { + String result = null; + try { + String xmlFragment = new String(FileUtils.readFileToByteArray(new File("/Users/jwillenborg/tmp/testFragment2.xml")), "utf-8"); + String srcUrlStr = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Benedetti_1585.xml&mode=pureXml&pn=13"; + URL srcUrl = new URL(srcUrlStr); + InputStream inputStream = srcUrl.openStream(); + BufferedInputStream in = new BufferedInputStream(inputStream); + xmlFragment = IOUtils.toString(in, "utf-8"); + in.close(); + + XmlTokenizer xmlTokenizer = new XmlTokenizer(new StringReader(xmlFragment)); + xmlTokenizer.setLanguage("lat"); + String[] stopElements = {"var"}; + // xmlTokenizer.setOutputFormat("string"); + String[] outputOptions = {"withLemmas"}; + xmlTokenizer.setOutputOptions(outputOptions); + xmlTokenizer.setStopElements(stopElements); + xmlTokenizer.tokenize(); + result = xmlTokenizer.getXmlResult(); + System.out.println(result); + } catch (Exception e) { + throw new ApplicationException(e); + } + return result; + } + + private String normalizeWords(String xmlStr) throws ApplicationException { + try { + WordContentHandler wordContentHandler = new WordContentHandler(); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(wordContentHandler); + StringReader strReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(strReader); + xmlParser.parse(inputSource); + String result = wordContentHandler.getResult(); + return result; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String tokenizeWithLemmas(String xmlStr, String language) throws ApplicationException { + StringReader strReader = new StringReader(xmlStr); + XmlTokenizer xmlTokenizer = new XmlTokenizer(strReader); + xmlTokenizer.setLanguage(language); + String[] outputOptionsWithLemmas = {"withLemmas"}; // so all tokens are fetched with lemmas (costs performance) + xmlTokenizer.setOutputOptions(outputOptionsWithLemmas); + xmlTokenizer.tokenize(); + String retStr = xmlTokenizer.getXmlResult(); + return retStr; + } + + private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { + try { + WordContentHandler wordContentHandler = new WordContentHandler(); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(wordContentHandler); + StringReader strReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(strReader); + xmlParser.parse(inputSource); + String result = wordContentHandler.getResult(); + return result; + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { + String result = null; + try { + xmlStr = normalizeWords(xmlStr); + HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language); + highlightContentHandler.setFirstPageBreakReachedMode(true); + XMLReader xmlParser = new SAXParser(); + xmlParser.setContentHandler(highlightContentHandler); + StringReader stringReader = new StringReader(xmlStr); + InputSource inputSource = new InputSource(stringReader); + xmlParser.parse(inputSource); + result = highlightContentHandler.getResult().toString(); + } catch (SAXException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return result; + } + + private void testScheduler() throws ApplicationException { + CmsDocOperation docOperation = new CmsDocOperation("update", "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/getDoc?doc=/echo/zh/SongYingxing_1637.xml", null, "/echo/zh/SongYingxing_1637.xml"); + String[] elemNames = {"s", "head"}; + docOperation.setElementNames(elemNames); + CmsChainScheduler scheduler = CmsChainScheduler.getInstance(); + docOperation = scheduler.doOperation(docOperation); + String bla = ""; + } + + private void xquery() throws ApplicationException { + try { + XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); + URL srcUrl = new URL("file:/Users/jwillenborg/tmp/blablabla/Benedetti_1585.xml"); + String getTocEntries = "let $tocEntries := //echo:div[@type = 'section' or @type = 'chapter']"; + String getFigures = + "let $allFigures := //*:figure " + + "let $figures := " + + " for $figure at $pos in $allFigures "+ + " let $caption := string-join($figure/*:caption/text(), ' ') " + + " let $description := string-join($figure/*:description/text(), ' ') " + + " let $variables := string-join($figure/*:variables/text(), ' ') " + + " let $retFigure := " + + " element {'figure'}" + + " { attribute {'number'} {$pos}, " + + " element {'caption'} {$caption}, " + + " element {'description'} {$description}, " + + " element {'variables'} {$variables} }" + + " return " + + " $retFigure " + + "return $figures"; + String result = xQueryEvaluator.evaluateAsString(srcUrl, getFigures); + String bla = result; + } catch (Exception e) { + throw new ApplicationException(e); + } + } + + private void createToc() throws ApplicationException { + String docDirName = "/Users/jwillenborg/mpdl/data/xml/documents/echo/zh/SongYingxing_1637"; + XslResourceTransformer tocTransformer = new XslResourceTransformer("toc.xsl"); + File tocFile = new File(docDirName + "/toc.xml"); + String docDestFileName = docDirName + "/SongYingxing_1637.xml"; + String tocResult = tocTransformer.transform(docDestFileName); + String bla = ""; + } + + private void getDocInfo() throws ApplicationException { + IndexHandler indexHandler = IndexHandler.getInstance(); + MetadataRecord mdRecord = indexHandler.getDocMetadata("/echo/la/Benedetti_1585_163127KK.xml"); + } + +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/GetFragmentsContentHandler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,203 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.transform; + +import java.util.ArrayList; +import java.util.Hashtable; + +import org.xml.sax.*; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; + +public class GetFragmentsContentHandler implements ContentHandler { + private String xmlnsString = ""; + private int currentMilestonePosition = 0; + private Element currentElement; + private Element currentMilestoneElement; + private ArrayList elementQueue; // queue of opened elements (before they were closed): to connect them to a parent hierarchy + private Hashtable resultFragments = new Hashtable(); + private String milestoneElemenName = "pb"; // default is pb + + public GetFragmentsContentHandler() throws ApplicationException { + } + + public GetFragmentsContentHandler(String milestoneElemenName) throws ApplicationException { + this.milestoneElemenName = milestoneElemenName; + } + + public Hashtable getResultPages() { + return resultFragments; + } + + public int getMilestoneCount() { + return currentMilestonePosition; + } + + public void startDocument() throws SAXException { + } + + public void endDocument() throws SAXException { + // write the closePath after the last minus 1 milestone element (the closing path after the last milestone element is automatically written by the normal closing tags) + if (currentMilestoneElement != null) { + String msClosePath = currentMilestoneElement.getClosePath(); + write(msClosePath, currentMilestoneElement.milestonePosition - 1); + } + resultFragments.remove(new Integer(0)); // this fragment is filled but should not + } + + public void characters(char[] c, int start, int length) throws SAXException { + char[] cCopy = new char[length]; + System.arraycopy(c, start, cCopy, 0, length); + String charactersStr = String.valueOf(cCopy); + if (charactersStr != null && ! charactersStr.equals("")) { + if (currentMilestonePosition > 0) { + charactersStr = StringUtils.deresolveXmlEntities(charactersStr); + write(charactersStr); + } + } + } + + public void ignorableWhitespace(char[] c, int start, int length) throws SAXException { + } + + public void processingInstruction(String target, String data) throws SAXException { + } + + public void setDocumentLocator(Locator locator) { + } + + public void startPrefixMapping(String prefix, String uri) throws SAXException { + if (prefix != null && prefix.equals("")) + xmlnsString += "xmlns" + "=\"" + uri + "\" "; + else + xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" "; + } + + public void endPrefixMapping(String prefix) throws SAXException { + } + + public void skippedEntity(String name) throws SAXException { + } + + public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException { + if (elementQueue == null) + elementQueue = new ArrayList(); + Element newElement = new Element(name); + if (currentElement != null) { + newElement.parent = currentElement; + } + currentElement = newElement; + if (localName != null && localName.equals(milestoneElemenName)) { + currentMilestonePosition++; + if (currentMilestoneElement != null) { + String msClosePath = currentMilestoneElement.getClosePath(); + write(msClosePath, currentMilestoneElement.milestonePosition - 1); + } + currentMilestoneElement = currentElement; + currentMilestoneElement.milestonePosition = currentMilestonePosition; + String msOpenPath = currentMilestoneElement.getOpenPath(); + write(msOpenPath); + } + int attrSize = attrs.getLength(); + String attrString = ""; + for (int i=0; i 0) { + write("<" + name); + if (xmlnsString != null && ! xmlnsString.isEmpty()) + write(" " + xmlnsString); + if (attrString != null && ! attrString.isEmpty()) + write(" " + attrString); + write(">"); + } + xmlnsString = ""; + elementQueue.add(currentElement); + } + + public void endElement(String uri, String localName, String name) throws SAXException { + if (currentMilestonePosition > 0) { + write(""); + } + if (elementQueue != null && elementQueue.size() > 0) { + int lastIndex = elementQueue.size() - 1; + elementQueue.remove(lastIndex); + } + if (elementQueue != null && elementQueue.size() > 0) { + int lastIndex = elementQueue.size() - 1; + currentElement = elementQueue.get(lastIndex); + } else { + currentElement = null; + } + } + + private void write(String outStr) throws SAXException { + StringBuilder resultFragment = resultFragments.get(new Integer(currentMilestonePosition)); + if (resultFragment == null) { + resultFragment = new StringBuilder(); + resultFragments.put(new Integer(currentMilestonePosition), resultFragment); + } + resultFragment.append(outStr); + } + + private void write(String outStr, int milestoneNumber) throws SAXException { + StringBuilder resultFragment = resultFragments.get(new Integer(milestoneNumber)); + if (resultFragment == null) { + resultFragment = new StringBuilder(); + resultFragments.put(new Integer(milestoneNumber), resultFragment); + } + resultFragment.append(outStr); + } + + public class Element { + public String name; + private String xmlnsString; + private String attrString; + private int milestonePosition; + private Element parent; + + private Element(String name) { + this.name = name; + } + + private String getOpenPath() { + StringBuilder ancestorsStrBuilder = new StringBuilder(); + if (parent != null) { + ancestorsStrBuilder.append(parent.getOpenPath()); + ancestorsStrBuilder.append("<"); + ancestorsStrBuilder.append(parent.name); + if (parent.xmlnsString != null && ! parent.xmlnsString.isEmpty()) { + ancestorsStrBuilder.append(" "); + ancestorsStrBuilder.append(parent.xmlnsString); + } + if (parent.attrString != null && ! parent.attrString.isEmpty()) { + ancestorsStrBuilder.append(" " + parent.attrString); + } + ancestorsStrBuilder.append(">"); + } + return ancestorsStrBuilder.toString(); + } + + private String getClosePath() { + StringBuilder ancestorsStrBuilder = new StringBuilder(); + if (parent != null) { + ancestorsStrBuilder.append(""); + ancestorsStrBuilder.append(parent.getClosePath()); + } + return ancestorsStrBuilder.toString(); + } + + } +} diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,215 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.transform; + +import java.util.ArrayList; + +import org.xml.sax.*; + +import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; + +public class HighlightContentHandler implements ContentHandler { + private String xmlnsString = ""; + private String highlightElemName; + private int highlightElemPos = 1; + private int currentHighlightElemPos = 0; + private boolean highlightElemMode = false; + private int highlightElemModeOpenTags = 0; + private String highlightQueryType = "orig"; // orig, reg, norm or morph + private String highlightQuery; // complex Lucene query + private String highlightQueryForms; // highlight terms separated by a blank + private boolean highlightHitMode = false; + private int highlightHitModeOpenTags = 0; + private boolean firstPageBreakReachedMode = false; // in a page fragment: if a page break element is surrounded by an element (e.g. "s") then this element should not increment the currentHighlightElemPos + private boolean firstPageBreakReached = true; + private StringBuilder result = new StringBuilder(); + + public HighlightContentHandler() throws ApplicationException { + } + + public HighlightContentHandler(String highlightElemName, int highlightElemPos) throws ApplicationException { + this.highlightElemName = highlightElemName; + this.highlightElemPos = highlightElemPos; + } + + public HighlightContentHandler(String highlightElemName, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { + this.highlightElemName = highlightElemName; + this.highlightElemPos = highlightElemPos; + this.highlightQueryType = highlightQueryType; + this.highlightQuery = highlightQuery; + if (highlightQuery != null) { + IndexHandler indexHandler = IndexHandler.getInstance(); + ArrayList queryTerms = indexHandler.fetchTerms(highlightQuery, language); // all query terms in query (also morphological terms) + highlightQueryForms = toString(queryTerms); + } + } + + public void setFirstPageBreakReachedMode(boolean firstPageBreakReachedMode) { + this.firstPageBreakReachedMode = firstPageBreakReachedMode; + if (firstPageBreakReachedMode) + this.firstPageBreakReached = false; // is first set to false and later if a page break is found (by startElement) it is set to true + } + + public StringBuilder getResult() { + return result; + } + + public void startDocument() throws SAXException { + } + + public void endDocument() throws SAXException { + } + + public void characters(char[] c, int start, int length) throws SAXException { + char[] cCopy = new char[length]; + System.arraycopy(c, start, cCopy, 0, length); + String charactersStr = String.valueOf(cCopy); + if (charactersStr != null && ! charactersStr.equals("")) { + charactersStr = StringUtils.deresolveXmlEntities(charactersStr); + write(charactersStr); + } + } + + public void ignorableWhitespace(char[] c, int start, int length) throws SAXException { + } + + public void processingInstruction(String target, String data) throws SAXException { + } + + public void setDocumentLocator(Locator locator) { + } + + public void startPrefixMapping(String prefix, String uri) throws SAXException { + xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" "; + if (prefix != null && prefix.equals("")) + xmlnsString = "xmlns" + "=\"" + uri + "\" "; + } + + public void endPrefixMapping(String prefix) throws SAXException { + } + + public void skippedEntity(String name) throws SAXException { + } + + public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException { + int attrSize = attrs.getLength(); + String attrString = ""; + for (int i=0; i"); + } + } + if (highlightElemMode) { + highlightElemModeOpenTags++; + } + // start highlight query + if (highlightQuery != null && localName.equals("w")) { + boolean matched = false; + String attrQName = "form"; + if (highlightQueryType.equals("orig")) + attrQName = "form"; + else if (highlightQueryType.equals("reg")) + attrQName = "formRegularized"; + else if (highlightQueryType.equals("norm")) + attrQName = "formNormalized"; + else if (highlightQueryType.equals("morph")) + attrQName = "lemmas"; + String attrValue = getAttrValue(attrs, attrQName); + if (highlightQueryType.equals("reg") && attrValue == null) + attrValue = getAttrValue(attrs, "form"); // if no regularized form exist it takes the form + if (attrValue != null) { + String[] forms = highlightQueryForms.split(" "); + for (int i=0; i"); + } + } + if (highlightHitMode) { + highlightHitModeOpenTags++; + } + write("<" + name); + if (xmlnsString != null && ! xmlnsString.isEmpty()) + write(" " + xmlnsString); + if (attrString != null && ! attrString.isEmpty()) + write(" " + attrString); + write(">"); + xmlnsString = ""; + } + + public void endElement(String uri, String localName, String name) throws SAXException { + write(""); + // end highlight element at position + if (highlightElemMode) { + if (highlightElemModeOpenTags == 1) { + highlightElemMode = false; + write(""); + } + highlightElemModeOpenTags--; + } + // end highlight query + if (highlightHitMode) { + if (highlightHitModeOpenTags == 1) { + highlightHitMode = false; + write(""); + } + highlightHitModeOpenTags--; + } + } + + private String toString(ArrayList queryForms) { + String queryFormsStr = ""; + for (int i=0; i + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageArchimedes.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageArchimedes.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,332 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            +
            + + + +
            + + + +
            +
            +
            + + + +
            + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEcho.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageEcho.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,559 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            +
            + + + +
            + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            +
            + + + + +
            + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageTei.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageTei.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,690 @@ + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + +
            + + + + +
            + + +
            +
            + + +
            + + + + + + + + + + + + + +
            +
            +
            + + + + + + + + + + + + + + + + +
            + Figure: {$figDesc} +
            + +
            +
            +
            +
            +
            +
            + + + + + +
          • +
            +
            +
            +
            +
            +
            +
            +
            + + + +
            + + + +
            +
            +
            + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            +
            + + + + + + +01002 b + ghj565hghj23,b02,0 + 0 + bvcxvb4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
          • +
            +
            +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXhtml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXhtml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,240 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            +
            + + +
            + + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + +
            + + +
            +
            + + +
            + + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXml.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/pageXml.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
              + +
            +
            + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + +
            +
            + + + + + + +
            +
            + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/queryDocument.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/queryDocument.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,93 @@ + + + + + + + + + + +' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
            + + + + + + + +
            +
            + + + + + + + + 1 + + + + + + + + xmlId + 4711 + + false + + + + + + + +
            +
            + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/replaceAnchor.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/replaceAnchor.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,82 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/toc.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/toc.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + Table of contents + + + + + + + Figures + + + + + + + Notes + + + + + + + Pages + + + + + + + Places + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocOut.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/tocOut.xsl Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + +
            + + + +
            +
            + + + + + + + + + + + + +
            diff -r e845310098ba -r e9fe3186670c software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/translator/MicrosoftTranslator.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/translator/MicrosoftTranslator.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,120 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.translator; + +import java.util.ArrayList; + +import com.memetix.mst.language.Language; +import com.memetix.mst.translate.Translate; +import com.memetix.mst.detect.Detect; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; + +public class MicrosoftTranslator { + private static final String KEY = "474A4E72DB217E37031EC190ACB4159378A6917C"; // free key to use with Microsoft server + private static String[] DEST_LANGUAGES = {"eng", "ger", "fra"}; + + private MicrosoftTranslator() { + // nothing: prevent instantiation + } + + public static String[] getDestLanguages() { + return DEST_LANGUAGES; + } + + public static String translate(String query, String toLanguageStr) throws ApplicationException { + if (toLanguageStr == null) + throw new ApplicationException("MicrosoftTranslator: toLanguageStr is null"); + String translation = null; + try { + Translate.setKey(KEY); // Set the API key once per JVM. It is set statically and applies to all services + String langId = de.mpg.mpiwg.berlin.mpdl.lt.general.Language.getInstance().getLanguageId(toLanguageStr); // e.g. "de" is delivered from "deu" + Language toLanguage = Language.fromString(langId); + translation = Translate.execute(query, toLanguage); + translation = translation.toLowerCase(); + } catch (Exception e) { + throw new ApplicationException(e); + } + return translation; + } + + public static String translate(String query, String fromLanguageStr, String toLanguageStr) throws ApplicationException { + if (fromLanguageStr == null) + throw new ApplicationException("MicrosoftTranslator: toLanguageStr is null"); + if (toLanguageStr == null) + throw new ApplicationException("MicrosoftTranslator: fromLanguageStr is null"); + String translation = null; + try { + Translate.setKey(KEY); + String fromLangId = de.mpg.mpiwg.berlin.mpdl.lt.general.Language.getInstance().getLanguageId(fromLanguageStr); // e.g. "de" is delivered from "deu" + String toLangId = de.mpg.mpiwg.berlin.mpdl.lt.general.Language.getInstance().getLanguageId(toLanguageStr); // e.g. "de" is delivered from "deu" + Language fromLanguage = Language.fromString(fromLangId); + Language toLanguage = Language.fromString(toLangId); + translation = Translate.execute(query, fromLanguage, toLanguage); + translation = translation.toLowerCase(); + } catch (Exception e) { + throw new ApplicationException(e); + } + return translation; + } + + public static ArrayList translate(String[] query, String fromLanguageStr, String[] toLanguagesStr) throws ApplicationException { + ArrayList translations = new ArrayList(); + for (int i=0; i