0
|
1 <?xml version="1.0"?>
|
|
2 <!--
|
|
3 Licensed to the Apache Software Foundation (ASF) under one or more
|
|
4 contributor license agreements. See the NOTICE file distributed with
|
|
5 this work for additional information regarding copyright ownership.
|
|
6 The ASF licenses this file to You under the Apache License, Version 2.0
|
|
7 (the "License"); you may not use this file except in compliance with
|
|
8 the License. You may obtain a copy of the License at
|
|
9
|
|
10 http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
|
12 Unless required by applicable law or agreed to in writing, software
|
|
13 distributed under the License is distributed on an "AS IS" BASIS,
|
|
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15 See the License for the specific language governing permissions and
|
|
16 limitations under the License.
|
|
17 -->
|
|
18 <!-- This is the configuration file for the RegexUrlNormalize Class.
|
|
19 This is intended so that users can specify substitutions to be
|
|
20 done on URLs. The regex engine that is used is Perl5 compatible.
|
|
21 The rules are applied to URLs in the order they occur in this file. -->
|
|
22
|
|
23 <!-- WATCH OUT: an xml parser reads this file an ampersands must be
|
|
24 expanded to & -->
|
|
25
|
|
26 <!-- The following rules show how to strip out session IDs, default pages,
|
|
27 interpage anchors, etc. Order does matter! -->
|
|
28 <regex-normalize>
|
|
29
|
|
30 <!-- removes session ids from urls (such as jsessionid and PHPSESSID) -->
|
|
31 <regex>
|
|
32 <pattern>(?i)(;?\b_?(l|j|bv_)?(sid|phpsessid|sessionid)=.*?)(\?|&|#|$)</pattern>
|
|
33 <substitution>$4</substitution>
|
|
34 </regex>
|
|
35
|
|
36 <!-- changes default pages into standard for /index.html, etc. into /
|
|
37 <regex>
|
|
38 <pattern>/((?i)index|default)\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\?|&|#|$)</pattern>
|
|
39 <substitution>/$3</substitution>
|
|
40 </regex> -->
|
|
41
|
|
42 <!-- removes interpage href anchors such as site.com#location -->
|
|
43 <regex>
|
|
44 <pattern>#.*?(\?|&|$)</pattern>
|
|
45 <substitution>$1</substitution>
|
|
46 </regex>
|
|
47
|
|
48 <!-- cleans ?&var=value into ?var=value -->
|
|
49 <regex>
|
|
50 <pattern>\?&</pattern>
|
|
51 <substitution>\?</substitution>
|
|
52 </regex>
|
|
53
|
|
54 <!-- cleans multiple sequential ampersands into a single ampersand -->
|
|
55 <regex>
|
|
56 <pattern>&{2,}</pattern>
|
|
57 <substitution>&</substitution>
|
|
58 </regex>
|
|
59
|
|
60 <!-- removes trailing ? -->
|
|
61 <regex>
|
|
62 <pattern>[\?&\.]$</pattern>
|
|
63 <substitution></substitution>
|
|
64 </regex>
|
|
65
|
|
66 <!-- removes duplicate slashes -->
|
|
67 <regex>
|
|
68 <pattern>(?<!:)/{2,}</pattern>
|
|
69 <substitution>/</substitution>
|
|
70 </regex>
|
|
71
|
|
72 </regex-normalize>
|