View Javadoc

1   /* ====================================================================
2    * Copyright (c) 2006 J.T. Beetstra
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining 
5    * a copy of this software and associated documentation files (the 
6    * "Software"), to deal in the Software without restriction, including 
7    * without limitation the rights to use, copy, modify, merge, publish, 
8    * distribute, sublicense, and/or sell copies of the Software, and to 
9    * permit persons to whom the Software is furnished to do so, subject to 
10   * the following conditions:
11   *
12   * The above copyright notice and this permission notice shall be 
13   * included in all copies or substantial portions of the Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
16   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
17   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
18   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
19   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
20   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
21   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22   * ====================================================================
23   */
24  package com.beetstra.jutf7;
25  
26  import java.nio.charset.Charset;
27  import java.nio.charset.CharsetDecoder;
28  import java.nio.charset.CharsetEncoder;
29  import java.util.Arrays;
30  import java.util.List;
31  
32  /**
33   * <p>Abstract base class for UTF-7 style encoding and decoding.</p>
34   * 
35   * @author Jaap Beetstra
36   */
37  abstract class UTF7StyleCharset extends Charset {
38  	private static final List CONTAINED = Arrays.asList(new String[] { "US-ASCII", "ISO-8859-1",
39  			"UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE" });
40  	final boolean strict;
41  	Base64Util base64;
42  
43  	/**
44  	 * <p>Besides the name and aliases, two additional parameters are required. First the 
45  	 * base 64 alphabet used; in modified UTF-7 a slightly different alphabet is used. 
46  	 * Additionally, it should be specified if encoders and decoders should be strict 
47  	 * about the interpretation of malformed encoded sequences. This is used since 
48  	 * modified UTF-7 specifically disallows some constructs which are allowed (or not 
49  	 * specifically disallowed) in UTF-7 (RFC 2152).</p>
50  	 * 
51  	 * @param canonicalName The name as defined in java.nio.charset.Charset
52  	 * @param aliases The aliases as defined in java.nio.charset.Charset
53  	 * @param alphabet The base 64 alphabet used
54  	 * @param strict True if strict handling of sequences is requested
55  	 */
56  	protected UTF7StyleCharset(String canonicalName, String[] aliases, String alphabet,
57  			boolean strict) {
58  		super(canonicalName, aliases);
59  		this.base64 = new Base64Util(alphabet);
60  		this.strict = strict;
61  	}
62  
63  	/* (non-Javadoc)
64  	 * @see java.nio.charset.Charset#contains(java.nio.charset.Charset)
65  	 */
66  	public boolean contains(final Charset cs) {
67  		return CONTAINED.contains(cs.name());
68  	}
69  
70  	/* (non-Javadoc)
71  	 * @see java.nio.charset.Charset#newDecoder()
72  	 */
73  	public CharsetDecoder newDecoder() {
74  		return new UTF7StyleCharsetDecoder(this, base64, strict);
75  	}
76  
77  	/* (non-Javadoc)
78  	 * @see java.nio.charset.Charset#newEncoder()
79  	 */
80  	public CharsetEncoder newEncoder() {
81  		return new UTF7StyleCharsetEncoder(this, base64, strict);
82  	}
83  
84  	/**
85  	 * Tells if a character can be encoded using simple (US-ASCII) encoding or 
86  	 * requires base 64 encoding.
87  	 * 
88  	 * @param ch The character
89  	 * @return True if the character can be encoded directly, false otherwise
90  	 */
91  	abstract boolean canEncodeDirectly(char ch);
92  
93  	/**
94  	 * Returns character used to switch to base 64 encoding.
95  	 * @return The shift character
96  	 */
97  	abstract byte shift();
98  
99  	/**
100 	 * Returns character used to switch from base 64 encoding to simple encoding.
101 	 * @return The unshift character
102 	 */
103 	abstract byte unshift();
104 }