View Javadoc

1   /* ====================================================================
2    * Copyright (c) 2006 J.T. Beetstra
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining 
5    * a copy of this software and associated documentation files (the 
6    * "Software"), to deal in the Software without restriction, including 
7    * without limitation the rights to use, copy, modify, merge, publish, 
8    * distribute, sublicense, and/or sell copies of the Software, and to 
9    * permit persons to whom the Software is furnished to do so, subject to 
10   * the following conditions:
11   *
12   * The above copyright notice and this permission notice shall be 
13   * included in all copies or substantial portions of the Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
16   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
17   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
18   * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
19   * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
20   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
21   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22   * ====================================================================
23   */
24  package com.beetstra.jutf7;
25  
26  import java.util.Arrays;
27  
28  /**
29   * <p>Represent a base 64 mapping. The 64 characters used in the encoding can be specified, 
30   * since modified-UTF-7 uses other characters than UTF-7 (',' instead of '/').</p>
31   * 
32   * <p>The exact type of the arguments and result values is adapted to the needs of the 
33   * encoder and decoder, as opposed to following a strict interpretation of base 64.</p> 
34   * <p>Base 64, as specified in RFC 2045, is an encoding used to encode bytes as characters. 
35   * In (modified-)UTF-7 however, it is used to encode characters as bytes, using some 
36   * intermediate steps:</p>
37   * <ol>
38   * <li>Encode all characters as a 16-bit (UTF-16) integer value</li>
39   * <li>Write this as stream of bytes (most-significant first)</li> 
40   * <li>Encode these bytes using (modified) base 64 encoding</li>
41   * <li>Write the thus formed stream of characters as a stream of bytes, using ASCII encoding</li>
42   * </ol>
43   * 
44   * @author Jaap Beetstra
45   */
46  class Base64Util {
47  	private static final int ALPHABET_LENGTH = 64;
48  	private final char[] alphabet;
49  	private final int[] inverseAlphabet;
50  
51  	/**
52  	 * Initializes the class with the specified encoding/decoding alphabet. 
53  	 * 
54  	 * @param alphabet
55  	 * @throws IllegalArgumentException if alphabet is not 64 characters long or 
56  	 *  contains characters which are not 7-bit ASCII
57  	 */
58  	Base64Util(final String alphabet) {
59  		this.alphabet = alphabet.toCharArray();
60  		if (alphabet.length() != ALPHABET_LENGTH)
61  			throw new IllegalArgumentException("alphabet has incorrect length (should be 64, not "
62  					+ alphabet.length() + ")");
63  		inverseAlphabet = new int[128];
64  		Arrays.fill(inverseAlphabet, -1);
65  		for (int i = 0; i < this.alphabet.length; i++) {
66  			final char ch = this.alphabet[i];
67  			if (ch >= 128)
68  				throw new IllegalArgumentException("invalid character in alphabet: " + ch);
69  			inverseAlphabet[ch] = i;
70  		}
71  	}
72  
73  	/**
74  	 * Returns the integer value of the six bits represented by the specified character.
75  	 * 
76  	 * @param ch The character, as a ASCII encoded byte 
77  	 * @return The six bits, as an integer value, or -1 if the byte is not in the alphabet
78  	 */
79  	int getSextet(final byte ch) {
80  		if (ch >= 128)
81  			return -1;
82  		return inverseAlphabet[ch];
83  	}
84  
85  	/**
86  	 * Tells whether the alphabet contains the specified character.
87  	 * 
88  	 * @param ch The character 
89  	 * @return true if the alphabet contains <code>ch</code>, false otherwise
90  	 */
91  	boolean contains(final char ch) {
92  		if (ch >= 128)
93  			return false;
94  		return inverseAlphabet[ch] >= 0;
95  	}
96  
97  	/**
98  	 * Encodes the six bit group as a character.
99  	 * 
100 	 * @param sextet The six bit group to be encoded
101 	 * @return The ASCII value of the character
102 	 */
103 	byte getChar(final int sextet) {
104 		return (byte) alphabet[sextet];
105 	}
106 }