001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.imaging.common; 018 019import java.io.ByteArrayInputStream; 020import java.io.ByteArrayOutputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.PushbackInputStream; 024import java.util.Map; 025 026import org.apache.commons.imaging.ImagingException; 027 028/** 029 * A rudimentary preprocessor and parser for the C programming language. 030 * 031 * FIXME replace this by a parser generated via ANTLR (if we really need it?!) 032 */ 033public class BasicCParser { 034 /** 035 * Parses the hexadecimal-base escape-sequence found at index {@code i} of {@code string}. 036 * 037 * <p> 038 * Helper-function for {@code unescapeString()}. 039 * </p> 040 * 041 * @param i the index of the escape-sequence in the string 042 * @param stringBuilder the stringBuilder to append the escape-char to 043 * @param string the string whose chars are parsed 044 * @return the new index i 045 * @since 1.0-alpha3 046 */ 047 private static int appendHex(int i, final StringBuilder stringBuilder, final String string) throws ImagingException { 048 if (i + 2 >= string.length()) { 049 throw new ImagingException("Parsing XPM file failed, " + "hex constant in string too short"); 050 } 051 final char hex1 = string.charAt(i + 1); 052 final char hex2 = string.charAt(i + 2); 053 i += 2; 054 int constant; 055 try { 056 constant = Integer.parseInt(hex1 + Character.toString(hex2), 16); 057 } catch (final NumberFormatException nfe) { 058 throw new ImagingException("Parsing XPM file failed, " + "hex constant invalid", nfe); 059 } 060 stringBuilder.append((char) constant); 061 return i; 062 } 063 064 /** 065 * Parses the octal-base escape-sequence found at index {@code i} of {@code string}. 066 * 067 * <p> 068 * Helper-function for {@code unescapeString()}. 069 * </p> 070 * 071 * @param i the index of the escape-sequence in the string 072 * @param stringBuilder the stringBuilder to append the escape-char to 073 * @param string the string whose chars are parsed 074 * @return the new index i 075 * @since 1.0-alpha3 076 */ 077 private static int appendOct(int i, final StringBuilder stringBuilder, final String string) { 078 int length = 1; 079 if (i + 1 < string.length() && '0' <= string.charAt(i + 1) && string.charAt(i + 1) <= '7') { 080 ++length; 081 } 082 if (i + 2 < string.length() && '0' <= string.charAt(i + 2) && string.charAt(i + 2) <= '7') { 083 ++length; 084 } 085 int constant = 0; 086 for (int j = 0; j < length; j++) { 087 constant *= 8; 088 constant += string.charAt(i + j) - '0'; 089 } 090 i += length - 1; 091 stringBuilder.append((char) constant); 092 return i; 093 } 094 095 /** 096 * Parses the {@code i:th} escape-char in the input {@code string} and appends it to {@code stringBuilder}. 097 * 098 * <p> 099 * Helper-function for {@code unescapeString()}. 100 * </p> 101 * 102 * @param i the index of the escape-char in the string 103 * @param stringBuilder the stringBuilder to append the escape-char to 104 * @param string the string whose chars are parsed 105 * @return the new index i 106 * @since 1.0-alpha3 107 */ 108 private static int parseEscape(int i, final StringBuilder stringBuilder, final String string) throws ImagingException { 109 final char c = string.charAt(i); 110 switch (c) { 111 case '\\': 112 stringBuilder.append('\\'); 113 break; 114 case '"': 115 stringBuilder.append('"'); 116 break; 117 case '\'': 118 stringBuilder.append('\''); 119 break; 120 case 'x': 121 i = appendHex(i, stringBuilder, string); 122 break; 123 case '0': 124 case '1': 125 case '2': 126 case '3': 127 case '4': 128 case '5': 129 case '6': 130 case '7': 131 i = appendOct(i, stringBuilder, string); 132 break; 133 case 'a': 134 stringBuilder.append((char) 0x07); 135 break; 136 case 'b': 137 stringBuilder.append((char) 0x08); 138 break; 139 case 'f': 140 stringBuilder.append((char) 0x0c); 141 break; 142 case 'n': 143 stringBuilder.append((char) 0x0a); 144 break; 145 case 'r': 146 stringBuilder.append((char) 0x0d); 147 break; 148 case 't': 149 stringBuilder.append((char) 0x09); 150 break; 151 case 'v': 152 stringBuilder.append((char) 0x0b); 153 break; 154 default: 155 throw new ImagingException("Parsing XPM file failed, " + "invalid escape sequence"); 156 } 157 return i; 158 159 } 160 161 public static ByteArrayOutputStream preprocess(final InputStream is, final StringBuilder firstComment, final Map<String, String> defines) 162 throws IOException, ImagingException { 163 boolean inSingleQuotes = false; 164 boolean inString = false; 165 boolean inComment = false; 166 boolean inDirective = false; 167 boolean hadSlash = false; 168 boolean hadStar = false; 169 boolean hadBackSlash = false; 170 final ByteArrayOutputStream out = new ByteArrayOutputStream(); 171 boolean seenFirstComment = firstComment == null; 172 final StringBuilder directiveBuffer = new StringBuilder(); 173 for (int c = is.read(); c != -1; c = is.read()) { 174 if (inComment) { 175 if (c == '*') { 176 if (hadStar && !seenFirstComment) { 177 firstComment.append('*'); 178 } 179 hadStar = true; 180 } else if (c == '/') { 181 if (hadStar) { 182 hadStar = false; 183 inComment = false; 184 seenFirstComment = true; 185 } else if (!seenFirstComment) { 186 firstComment.append((char) c); 187 } 188 } else { 189 if (hadStar && !seenFirstComment) { 190 firstComment.append('*'); 191 } 192 hadStar = false; 193 if (!seenFirstComment) { 194 firstComment.append((char) c); 195 } 196 } 197 } else if (inSingleQuotes) { 198 switch (c) { 199 case '\\': 200 if (hadBackSlash) { 201 out.write('\\'); 202 out.write('\\'); 203 hadBackSlash = false; 204 } else { 205 hadBackSlash = true; 206 } 207 break; 208 case '\'': 209 if (hadBackSlash) { 210 out.write('\\'); 211 hadBackSlash = false; 212 } else { 213 inSingleQuotes = false; 214 } 215 out.write('\''); 216 break; 217 case '\r': 218 case '\n': 219 throw new ImagingException("Unterminated single quote in file"); 220 default: 221 if (hadBackSlash) { 222 out.write('\\'); 223 hadBackSlash = false; 224 } 225 out.write(c); 226 break; 227 } 228 } else if (inString) { 229 switch (c) { 230 case '\\': 231 if (hadBackSlash) { 232 out.write('\\'); 233 out.write('\\'); 234 hadBackSlash = false; 235 } else { 236 hadBackSlash = true; 237 } 238 break; 239 case '"': 240 if (hadBackSlash) { 241 out.write('\\'); 242 hadBackSlash = false; 243 } else { 244 inString = false; 245 } 246 out.write('"'); 247 break; 248 case '\r': 249 case '\n': 250 throw new ImagingException("Unterminated string in file"); 251 default: 252 if (hadBackSlash) { 253 out.write('\\'); 254 hadBackSlash = false; 255 } 256 out.write(c); 257 break; 258 } 259 } else if (inDirective) { 260 if (c == '\r' || c == '\n') { 261 inDirective = false; 262 final String[] tokens = tokenizeRow(directiveBuffer.toString()); 263 if (tokens.length < 2 || tokens.length > 3) { 264 throw new ImagingException("Bad preprocessor directive"); 265 } 266 if (!tokens[0].equals("define")) { 267 throw new ImagingException("Invalid/unsupported " + "preprocessor directive '" + tokens[0] + "'"); 268 } 269 defines.put(tokens[1], tokens.length == 3 ? tokens[2] : null); 270 directiveBuffer.setLength(0); 271 } else { 272 directiveBuffer.append((char) c); 273 } 274 } else { 275 switch (c) { 276 case '/': 277 if (hadSlash) { 278 out.write('/'); 279 } 280 hadSlash = true; 281 break; 282 case '*': 283 if (hadSlash) { 284 inComment = true; 285 hadSlash = false; 286 } else { 287 out.write(c); 288 } 289 break; 290 case '\'': 291 if (hadSlash) { 292 out.write('/'); 293 } 294 hadSlash = false; 295 out.write(c); 296 inSingleQuotes = true; 297 break; 298 case '"': 299 if (hadSlash) { 300 out.write('/'); 301 } 302 hadSlash = false; 303 out.write(c); 304 inString = true; 305 break; 306 case '#': 307 if (defines == null) { 308 throw new ImagingException("Unexpected preprocessor directive"); 309 } 310 inDirective = true; 311 break; 312 default: 313 if (hadSlash) { 314 out.write('/'); 315 } 316 hadSlash = false; 317 out.write(c); 318 // Only whitespace allowed before first comment: 319 if (c != ' ' && c != '\t' && c != '\r' && c != '\n') { 320 seenFirstComment = true; 321 } 322 break; 323 } 324 } 325 } 326 if (hadSlash) { 327 out.write('/'); 328 } 329 if (hadStar) { 330 out.write('*'); 331 } 332 if (inString) { 333 throw new ImagingException("Unterminated string at the end of file"); 334 } 335 if (inComment) { 336 throw new ImagingException("Unterminated comment at the end of file"); 337 } 338 return out; 339 } 340 341 public static String[] tokenizeRow(final String row) { 342 final String[] tokens = row.split("[ \t]"); 343 int numLiveTokens = 0; 344 for (final String token : tokens) { 345 if (token != null && !token.isEmpty()) { 346 ++numLiveTokens; 347 } 348 } 349 final String[] liveTokens = Allocator.array(numLiveTokens, String[]::new, 24); 350 int next = 0; 351 for (final String token : tokens) { 352 if (token != null && !token.isEmpty()) { 353 liveTokens[next++] = token; 354 } 355 } 356 return liveTokens; 357 } 358 359 public static void unescapeString(final StringBuilder stringBuilder, final String string) throws ImagingException { 360 if (string.length() < 2) { 361 throw new ImagingException("Parsing XPM file failed, " + "string is too short"); 362 } 363 if (string.charAt(0) != '"' || string.charAt(string.length() - 1) != '"') { 364 throw new ImagingException("Parsing XPM file failed, " + "string not surrounded by '\"'"); 365 } 366 boolean hadBackSlash = false; 367 for (int i = 1; i < string.length() - 1; i++) { 368 final char c = string.charAt(i); 369 if (hadBackSlash) { 370 i = parseEscape(i, stringBuilder, string); 371 hadBackSlash = false; 372 } else if (c == '\\') { 373 hadBackSlash = true; 374 } else if (c == '"') { 375 throw new ImagingException("Parsing XPM file failed, " + "extra '\"' found in string"); 376 } else { 377 stringBuilder.append(c); 378 } 379 } 380 if (hadBackSlash) { 381 throw new ImagingException("Parsing XPM file failed, " + "unterminated escape sequence found in string"); 382 } 383 } 384 385 private final PushbackInputStream is; 386 387 public BasicCParser(final ByteArrayInputStream is) { 388 this.is = new PushbackInputStream(is); 389 } 390 391 public String nextToken() throws IOException, ImagingException { 392 // I don't know how complete the C parsing in an XPM file 393 // is meant to be, this is just the very basics... 394 395 boolean inString = false; 396 boolean inIdentifier = false; 397 boolean hadBackSlash = false; 398 final StringBuilder token = new StringBuilder(); 399 for (int c = is.read(); c != -1; c = is.read()) { 400 if (inString) { 401 switch (c) { 402 case '\\': 403 token.append('\\'); 404 hadBackSlash = !hadBackSlash; 405 break; 406 case '"': 407 token.append('"'); 408 if (!hadBackSlash) { 409 return token.toString(); 410 } 411 hadBackSlash = false; 412 break; 413 case '\r': 414 case '\n': 415 throw new ImagingException("Unterminated string in XPM file"); 416 default: 417 token.append((char) c); 418 hadBackSlash = false; 419 break; 420 } 421 } else if (inIdentifier) { 422 if (!Character.isLetterOrDigit(c) && c != '_') { 423 is.unread(c); 424 return token.toString(); 425 } 426 token.append((char) c); 427 } else if (c == '"') { 428 token.append('"'); 429 inString = true; 430 } else if (Character.isLetterOrDigit(c) || c == '_') { 431 token.append((char) c); 432 inIdentifier = true; 433 } else if (c == '{' || c == '}' || c == '[' || c == ']' || c == '*' || c == ';' || c == '=' || c == ',') { 434 token.append((char) c); 435 return token.toString(); 436 } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { 437 // ignore 438 } else { 439 throw new ImagingException("Unhandled/invalid character '" + (char) c + "' found in XPM file"); 440 } 441 } 442 443 if (inIdentifier) { 444 return token.toString(); 445 } 446 if (inString) { 447 throw new ImagingException("Unterminated string ends XMP file"); 448 } 449 return null; 450 } 451 452}