View Javadoc
1   /*
2    * DavMail POP/IMAP/SMTP/CalDav/LDAP Exchange Gateway
3    * Copyright (C) 2010  Mickael Guessant
4    *
5    * This program is free software; you can redistribute it and/or
6    * modify it under the terms of the GNU General Public License
7    * as published by the Free Software Foundation; either version 2
8    * of the License, or (at your option) any later version.
9    *
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with this program; if not, write to the Free Software
17   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18   */
19  
20  package davmail.http;
21  
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.net.URLCodec;
24  import org.apache.http.Consts;
25  
26  import java.io.IOException;
27  import java.util.BitSet;
28  
29  /**
30   * Implement encode/decode logic to replace HttpClient 3 URIUtil
31   */
32  public class URIUtil {
33  
34      /**
35       * The percent "%" character always has the reserved purpose of being the
36       * escape indicator, it must be escaped as "%25" in order to be used as
37       * data within a URI.
38       */
39      protected static final BitSet percent = new BitSet(256);
40      // Static initializer for percent
41      static {
42          percent.set('%');
43      }
44  
45  
46      /**
47       * BitSet for digit.
48       * <p><blockquote><pre>
49       * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
50       *            "8" | "9"
51       * </pre></blockquote><p>
52       */
53      protected static final BitSet digit = new BitSet(256);
54      // Static initializer for digit
55      static {
56          for (int i = '0'; i <= '9'; i++) {
57              digit.set(i);
58          }
59      }
60  
61  
62      /**
63       * BitSet for alpha.
64       * <p><blockquote><pre>
65       * alpha         = lowalpha | upalpha
66       * </pre></blockquote><p>
67       */
68      protected static final BitSet alpha = new BitSet(256);
69      // Static initializer for alpha
70      static {
71          for (int i = 'a'; i <= 'z'; i++) {
72              alpha.set(i);
73          }
74          for (int i = 'A'; i <= 'Z'; i++) {
75              alpha.set(i);
76          }
77      }
78  
79  
80      /**
81       * BitSet for alphanum (join of alpha &amp; digit).
82       * <p><blockquote><pre>
83       *  alphanum      = alpha | digit
84       * </pre></blockquote><p>
85       */
86      protected static final BitSet alphanum = new BitSet(256);
87      // Static initializer for alphanum
88      static {
89          alphanum.or(alpha);
90          alphanum.or(digit);
91      }
92  
93  
94      /**
95       * BitSet for hex.
96       * <p><blockquote><pre>
97       * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
98       *                         "a" | "b" | "c" | "d" | "e" | "f"
99       * </pre></blockquote><p>
100      */
101     protected static final BitSet hex = new BitSet(256);
102     // Static initializer for hex
103     static {
104         hex.or(digit);
105         for (int i = 'a'; i <= 'f'; i++) {
106             hex.set(i);
107         }
108         for (int i = 'A'; i <= 'F'; i++) {
109             hex.set(i);
110         }
111     }
112 
113 
114     /**
115      * BitSet for escaped.
116      * <p><blockquote><pre>
117      * escaped       = "%" hex hex
118      * </pre></blockquote><p>
119      */
120     protected static final BitSet escaped = new BitSet(256);
121     // Static initializer for escaped
122     static {
123         escaped.or(percent);
124         escaped.or(hex);
125     }
126 
127 
128     /**
129      * BitSet for mark.
130      * <p><blockquote><pre>
131      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
132      *                 "(" | ")"
133      * </pre></blockquote><p>
134      */
135     protected static final BitSet mark = new BitSet(256);
136     // Static initializer for mark
137     static {
138         mark.set('-');
139         mark.set('_');
140         mark.set('.');
141         mark.set('!');
142         mark.set('~');
143         mark.set('*');
144         mark.set('\'');
145         mark.set('(');
146         mark.set(')');
147     }
148 
149 
150     /**
151      * Data characters that are allowed in a URI but do not have a reserved
152      * purpose are called unreserved.
153      * <p><blockquote><pre>
154      * unreserved    = alphanum | mark
155      * </pre></blockquote><p>
156      */
157     protected static final BitSet unreserved = new BitSet(256);
158     // Static initializer for unreserved
159     static {
160         unreserved.or(alphanum);
161         unreserved.or(mark);
162     }
163 
164 
165     /**
166      * BitSet for reserved.
167      * <p><blockquote><pre>
168      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
169      *                 "$" | ","
170      * </pre></blockquote><p>
171      */
172     protected static final BitSet reserved = new BitSet(256);
173     // Static initializer for reserved
174     static {
175         reserved.set(';');
176         reserved.set('/');
177         reserved.set('?');
178         reserved.set(':');
179         reserved.set('@');
180         reserved.set('&');
181         reserved.set('=');
182         reserved.set('+');
183         reserved.set('$');
184         reserved.set(',');
185     }
186 
187 
188     /**
189      * BitSet for uric.
190      * <p><blockquote><pre>
191      * uric          = reserved | unreserved | escaped
192      * </pre></blockquote><p>
193      */
194     protected static final BitSet uric = new BitSet(256);
195     // Static initializer for uric
196     static {
197         uric.or(reserved);
198         uric.or(unreserved);
199         uric.or(escaped);
200     }
201 
202     /**
203      * BitSet for pchar.
204      * <p><blockquote><pre>
205      * pchar         = unreserved | escaped |
206      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
207      * </pre></blockquote><p>
208      */
209     protected static final BitSet pchar = new BitSet(256);
210     // Static initializer for pchar
211     static {
212         pchar.or(unreserved);
213         pchar.or(escaped);
214         pchar.set(':');
215         pchar.set('@');
216         pchar.set('&');
217         pchar.set('=');
218         pchar.set('+');
219         pchar.set('$');
220         pchar.set(',');
221     }
222 
223 
224     /**
225      * BitSet for param (alias for pchar).
226      * <p><blockquote><pre>
227      * param         = *pchar
228      * </pre></blockquote><p>
229      */
230     protected static final BitSet param = pchar;
231 
232 
233     /**
234      * BitSet for segment.
235      * <p><blockquote><pre>
236      * segment       = *pchar *( ";" param )
237      * </pre></blockquote><p>
238      */
239     protected static final BitSet segment = new BitSet(256);
240     // Static initializer for segment
241     static {
242         segment.or(pchar);
243         segment.set(';');
244         segment.or(param);
245     }
246 
247 
248     /**
249      * BitSet for path segments.
250      * <p><blockquote><pre>
251      * path_segments = segment *( "/" segment )
252      * </pre></blockquote><p>
253      */
254     protected static final BitSet path_segments = new BitSet(256);
255     // Static initializer for path_segments
256     static {
257         path_segments.set('/');
258         path_segments.or(segment);
259     }
260 
261     /**
262      * URI absolute path.
263      * <p><blockquote><pre>
264      * abs_path      = "/"  path_segments
265      * </pre></blockquote><p>
266      */
267     protected static final BitSet abs_path = new BitSet(256);
268     // Static initializer for abs_path
269     static {
270         abs_path.set('/');
271         abs_path.or(path_segments);
272     }
273 
274     /**
275      * Those characters that are allowed for the abs_path.
276      */
277     public static final BitSet allowed_abs_path = new BitSet(256);
278     static {
279         allowed_abs_path.or(abs_path);
280         // allowed_abs_path.set('/');  // aleady included
281         allowed_abs_path.andNot(percent);
282         allowed_abs_path.clear('+');
283     }
284 
285     /**
286      * Those characters that are allowed for the query component.
287      */
288     public static final BitSet allowed_query = new BitSet(256);
289     // Static initializer for allowed_query
290     static {
291         allowed_query.or(uric);
292         allowed_query.clear('%');
293     }
294 
295     /**
296      * Those characters that are allowed within the query component.
297      */
298     public static final BitSet allowed_within_query = new BitSet(256);
299     // Static initializer for allowed_within_query
300     static {
301         allowed_within_query.or(allowed_query);
302         allowed_within_query.andNot(reserved); // excluded 'reserved'
303     }
304 
305     /**
306      * Decode url encoded string.
307      * @param escaped encoded string
308      * @return decoded string
309      * @throws IOException on error
310      */
311     public static String decode(String escaped) throws IOException {
312         try {
313             return getString(URLCodec.decodeUrl(getAsciiBytes(escaped)));
314         } catch (DecoderException e) {
315             throw new IOException(e.getMessage());
316         }
317     }
318 
319     /**
320      * Encode url path.
321      * @param unescaped unencoded path
322      * @return escaped path
323      */
324     public static String encodePath(String unescaped) {
325         return encode(unescaped, allowed_abs_path);
326     }
327 
328     /**
329      * URL encode string.
330      * @param unescaped unencoded string
331      * @param allowed allowed characters bitset
332      * @return encoded string
333      */
334     public static String encode(String unescaped, BitSet allowed) {
335         return getAsciiString(URLCodec.encodeUrl(allowed, getBytes(unescaped)));
336     }
337 
338     /**
339      * URL encode query string.
340      * @param unescaped unencoded query string
341      * @return encoded string query string
342      */
343     public static String encodeWithinQuery(String unescaped) {
344         return encode(unescaped, allowed_within_query);
345     }
346 
347     /**
348      * URL encode path and query string.
349      * @param unescaped unencoded path and query string
350      * @return encoded string path and query string
351      */
352     public static String encodePathQuery(String unescaped){
353         int at = unescaped.indexOf('?');
354         if (at < 0) {
355             return encode(unescaped, allowed_abs_path);
356         } else {
357             return encode(unescaped.substring(0, at), allowed_abs_path)
358                     + '?' + encode(unescaped.substring(at + 1), allowed_query);
359         }
360     }
361 
362     public static byte[] getBytes(final String value) {
363         if (value == null) {
364             throw new IllegalArgumentException("Parameter may not be null");
365         }
366 
367         return value.getBytes(Consts.UTF_8);
368     }
369 
370     public static byte[] getAsciiBytes(final String value) {
371         if (value == null) {
372             throw new IllegalArgumentException("Parameter may not be null");
373         }
374 
375         return value.getBytes(Consts.ASCII);
376     }
377 
378     /**
379      * Convert byte array to an ASCII string value.
380      * @param bytes byte array
381      * @return ASCII string
382      */
383     public static String getAsciiString(final byte[] bytes) {
384         if (bytes == null) {
385             throw new IllegalArgumentException("Parameter may not be null");
386         }
387 
388         return new String(bytes, Consts.ASCII);
389     }
390 
391     /**
392      * Convert byte array to a UTF-8 string value.
393      * @param bytes byte array
394      * @return ASCII string
395      */
396     public static String getString(final byte[] bytes) {
397         if (bytes == null) {
398             throw new IllegalArgumentException("Parameter may not be null");
399         }
400 
401         return new String(bytes, Consts.UTF_8);
402     }
403 }