1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.fileupload.util.mime;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.IOException;
21 import java.io.UnsupportedEncodingException;
22 import java.util.HashMap;
23 import java.util.Locale;
24 import java.util.Map;
25
26
27
28
29
30
31 public final class MimeUtility {
32
33
34
35
36 private static final String US_ASCII_CHARSET = "US-ASCII";
37
38
39
40
41 private static final String BASE64_ENCODING_MARKER = "B";
42
43
44
45
46 private static final String QUOTEDPRINTABLE_ENCODING_MARKER = "Q";
47
48
49
50
51 private static final String ENCODED_TOKEN_MARKER = "=?";
52
53
54
55
56 private static final String ENCODED_TOKEN_FINISHER = "?=";
57
58
59
60
61 private static final String LINEAR_WHITESPACE = " \t\r\n";
62
63
64
65
66 private static final Map<String, String> MIME2JAVA = new HashMap<String, String>();
67
68 static {
69 MIME2JAVA.put("iso-2022-cn", "ISO2022CN");
70 MIME2JAVA.put("iso-2022-kr", "ISO2022KR");
71 MIME2JAVA.put("utf-8", "UTF8");
72 MIME2JAVA.put("utf8", "UTF8");
73 MIME2JAVA.put("ja_jp.iso2022-7", "ISO2022JP");
74 MIME2JAVA.put("ja_jp.eucjp", "EUCJIS");
75 MIME2JAVA.put("euc-kr", "KSC5601");
76 MIME2JAVA.put("euckr", "KSC5601");
77 MIME2JAVA.put("us-ascii", "ISO-8859-1");
78 MIME2JAVA.put("x-us-ascii", "ISO-8859-1");
79 }
80
81
82
83
84 private MimeUtility() {
85
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99 public static String decodeText(String text) throws UnsupportedEncodingException {
100
101
102 if (text.indexOf(ENCODED_TOKEN_MARKER) < 0) {
103 return text;
104 }
105
106 int offset = 0;
107 int endOffset = text.length();
108
109 int startWhiteSpace = -1;
110 int endWhiteSpace = -1;
111
112 StringBuilder decodedText = new StringBuilder(text.length());
113
114 boolean previousTokenEncoded = false;
115
116 while (offset < endOffset) {
117 char ch = text.charAt(offset);
118
119
120 if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
121 startWhiteSpace = offset;
122 while (offset < endOffset) {
123
124 ch = text.charAt(offset);
125 if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
126 offset++;
127 } else {
128
129
130 endWhiteSpace = offset;
131 break;
132 }
133 }
134 } else {
135
136 int wordStart = offset;
137
138 while (offset < endOffset) {
139
140 ch = text.charAt(offset);
141 if (LINEAR_WHITESPACE.indexOf(ch) == -1) {
142 offset++;
143 } else {
144 break;
145 }
146
147
148 }
149
150 String word = text.substring(wordStart, offset);
151
152 if (word.startsWith(ENCODED_TOKEN_MARKER)) {
153 try {
154
155 String decodedWord = decodeWord(word);
156
157
158 if (!previousTokenEncoded && startWhiteSpace != -1) {
159 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
160 startWhiteSpace = -1;
161 }
162
163 previousTokenEncoded = true;
164
165 decodedText.append(decodedWord);
166
167
168 continue;
169
170 } catch (ParseException e) {
171
172 }
173 }
174
175
176 if (startWhiteSpace != -1) {
177 decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
178 startWhiteSpace = -1;
179 }
180
181 previousTokenEncoded = false;
182 decodedText.append(word);
183 }
184 }
185
186 return decodedText.toString();
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200
201 private static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
202
203
204
205 if (!word.startsWith(ENCODED_TOKEN_MARKER)) {
206 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
207 }
208
209 int charsetPos = word.indexOf('?', 2);
210 if (charsetPos == -1) {
211 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
212 }
213
214
215 String charset = word.substring(2, charsetPos).toLowerCase(Locale.ENGLISH);
216
217
218 int encodingPos = word.indexOf('?', charsetPos + 1);
219 if (encodingPos == -1) {
220 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
221 }
222
223 String encoding = word.substring(charsetPos + 1, encodingPos);
224
225
226 int encodedTextPos = word.indexOf(ENCODED_TOKEN_FINISHER, encodingPos + 1);
227 if (encodedTextPos == -1) {
228 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
229 }
230
231 String encodedText = word.substring(encodingPos + 1, encodedTextPos);
232
233
234 if (encodedText.length() == 0) {
235 return "";
236 }
237
238 try {
239
240 ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
241
242 byte[] encodedData = encodedText.getBytes(US_ASCII_CHARSET);
243
244
245 if (encoding.equals(BASE64_ENCODING_MARKER)) {
246 Base64Decoder.decode(encodedData, out);
247 } else if (encoding.equals(QUOTEDPRINTABLE_ENCODING_MARKER)) {
248 QuotedPrintableDecoder.decode(encodedData, out);
249 } else {
250 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
251 }
252
253 byte[] decodedData = out.toByteArray();
254 return new String(decodedData, javaCharset(charset));
255 } catch (IOException e) {
256 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
257 }
258 }
259
260
261
262
263
264
265
266
267
268 private static String javaCharset(String charset) {
269
270 if (charset == null) {
271 return null;
272 }
273
274 String mappedCharset = MIME2JAVA.get(charset.toLowerCase(Locale.ENGLISH));
275
276
277 if (mappedCharset == null) {
278 return charset;
279 }
280 return mappedCharset;
281 }
282
283 }