001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.imaging.common;
018
019import java.io.ByteArrayInputStream;
020import java.io.ByteArrayOutputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.PushbackInputStream;
024import java.util.Map;
025
026import org.apache.commons.imaging.ImagingException;
027
028/**
029 * A rudimentary preprocessor and parser for the C programming language.
030 *
031 * FIXME replace this by a parser generated via ANTLR (if we really need it?!)
032 */
033public class BasicCParser {
034    /**
035     * Parses the hexadecimal-base escape-sequence found at index {@code i} of {@code string}.
036     *
037     * <p>
038     * Helper-function for {@code unescapeString()}.
039     * </p>
040     *
041     * @param i             the index of the escape-sequence in the string
042     * @param stringBuilder the stringBuilder to append the escape-char to
043     * @param string        the string whose chars are parsed
044     * @return the new index i
045     * @since 1.0-alpha3
046     */
047    private static int appendHex(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
048        if (i + 2 >= string.length()) {
049            throw new ImagingException("Parsing XPM file failed, " + "hex constant in string too short");
050        }
051        final char hex1 = string.charAt(i + 1);
052        final char hex2 = string.charAt(i + 2);
053        i += 2;
054        int constant;
055        try {
056            constant = Integer.parseInt(hex1 + Character.toString(hex2), 16);
057        } catch (final NumberFormatException nfe) {
058            throw new ImagingException("Parsing XPM file failed, " + "hex constant invalid", nfe);
059        }
060        stringBuilder.append((char) constant);
061        return i;
062    }
063
064    /**
065     * Parses the octal-base escape-sequence found at index {@code i} of {@code string}.
066     *
067     * <p>
068     * Helper-function for {@code unescapeString()}.
069     * </p>
070     *
071     * @param i             the index of the escape-sequence in the string
072     * @param stringBuilder the stringBuilder to append the escape-char to
073     * @param string        the string whose chars are parsed
074     * @return the new index i
075     * @since 1.0-alpha3
076     */
077    private static int appendOct(int i, final StringBuilder stringBuilder, final String string) {
078        int length = 1;
079        if (i + 1 < string.length() && '0' <= string.charAt(i + 1) && string.charAt(i + 1) <= '7') {
080            ++length;
081        }
082        if (i + 2 < string.length() && '0' <= string.charAt(i + 2) && string.charAt(i + 2) <= '7') {
083            ++length;
084        }
085        int constant = 0;
086        for (int j = 0; j < length; j++) {
087            constant *= 8;
088            constant += string.charAt(i + j) - '0';
089        }
090        i += length - 1;
091        stringBuilder.append((char) constant);
092        return i;
093    }
094
095    /**
096     * Parses the {@code i:th} escape-char in the input {@code string} and appends it to {@code stringBuilder}.
097     *
098     * <p>
099     * Helper-function for {@code unescapeString()}.
100     * </p>
101     *
102     * @param i             the index of the escape-char in the string
103     * @param stringBuilder the stringBuilder to append the escape-char to
104     * @param string        the string whose chars are parsed
105     * @return the new index i
106     * @since 1.0-alpha3
107     */
108    private static int parseEscape(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
109        final char c = string.charAt(i);
110        switch (c) {
111        case '\\':
112            stringBuilder.append('\\');
113            break;
114        case '"':
115            stringBuilder.append('"');
116            break;
117        case '\'':
118            stringBuilder.append('\'');
119            break;
120        case 'x':
121            i = appendHex(i, stringBuilder, string);
122            break;
123        case '0':
124        case '1':
125        case '2':
126        case '3':
127        case '4':
128        case '5':
129        case '6':
130        case '7':
131            i = appendOct(i, stringBuilder, string);
132            break;
133        case 'a':
134            stringBuilder.append((char) 0x07);
135            break;
136        case 'b':
137            stringBuilder.append((char) 0x08);
138            break;
139        case 'f':
140            stringBuilder.append((char) 0x0c);
141            break;
142        case 'n':
143            stringBuilder.append((char) 0x0a);
144            break;
145        case 'r':
146            stringBuilder.append((char) 0x0d);
147            break;
148        case 't':
149            stringBuilder.append((char) 0x09);
150            break;
151        case 'v':
152            stringBuilder.append((char) 0x0b);
153            break;
154        default:
155            throw new ImagingException("Parsing XPM file failed, " + "invalid escape sequence");
156        }
157        return i;
158
159    }
160
161    public static ByteArrayOutputStream preprocess(final InputStream is, final StringBuilder firstComment, final Map<String, String> defines)
162            throws IOException, ImagingException {
163        boolean inSingleQuotes = false;
164        boolean inString = false;
165        boolean inComment = false;
166        boolean inDirective = false;
167        boolean hadSlash = false;
168        boolean hadStar = false;
169        boolean hadBackSlash = false;
170        final ByteArrayOutputStream out = new ByteArrayOutputStream();
171        boolean seenFirstComment = firstComment == null;
172        final StringBuilder directiveBuffer = new StringBuilder();
173        for (int c = is.read(); c != -1; c = is.read()) {
174            if (inComment) {
175                if (c == '*') {
176                    if (hadStar && !seenFirstComment) {
177                        firstComment.append('*');
178                    }
179                    hadStar = true;
180                } else if (c == '/') {
181                    if (hadStar) {
182                        hadStar = false;
183                        inComment = false;
184                        seenFirstComment = true;
185                    } else if (!seenFirstComment) {
186                        firstComment.append((char) c);
187                    }
188                } else {
189                    if (hadStar && !seenFirstComment) {
190                        firstComment.append('*');
191                    }
192                    hadStar = false;
193                    if (!seenFirstComment) {
194                        firstComment.append((char) c);
195                    }
196                }
197            } else if (inSingleQuotes) {
198                switch (c) {
199                case '\\':
200                    if (hadBackSlash) {
201                        out.write('\\');
202                        out.write('\\');
203                        hadBackSlash = false;
204                    } else {
205                        hadBackSlash = true;
206                    }
207                    break;
208                case '\'':
209                    if (hadBackSlash) {
210                        out.write('\\');
211                        hadBackSlash = false;
212                    } else {
213                        inSingleQuotes = false;
214                    }
215                    out.write('\'');
216                    break;
217                case '\r':
218                case '\n':
219                    throw new ImagingException("Unterminated single quote in file");
220                default:
221                    if (hadBackSlash) {
222                        out.write('\\');
223                        hadBackSlash = false;
224                    }
225                    out.write(c);
226                    break;
227                }
228            } else if (inString) {
229                switch (c) {
230                case '\\':
231                    if (hadBackSlash) {
232                        out.write('\\');
233                        out.write('\\');
234                        hadBackSlash = false;
235                    } else {
236                        hadBackSlash = true;
237                    }
238                    break;
239                case '"':
240                    if (hadBackSlash) {
241                        out.write('\\');
242                        hadBackSlash = false;
243                    } else {
244                        inString = false;
245                    }
246                    out.write('"');
247                    break;
248                case '\r':
249                case '\n':
250                    throw new ImagingException("Unterminated string in file");
251                default:
252                    if (hadBackSlash) {
253                        out.write('\\');
254                        hadBackSlash = false;
255                    }
256                    out.write(c);
257                    break;
258                }
259            } else if (inDirective) {
260                if (c == '\r' || c == '\n') {
261                    inDirective = false;
262                    final String[] tokens = tokenizeRow(directiveBuffer.toString());
263                    if (tokens.length < 2 || tokens.length > 3) {
264                        throw new ImagingException("Bad preprocessor directive");
265                    }
266                    if (!tokens[0].equals("define")) {
267                        throw new ImagingException("Invalid/unsupported " + "preprocessor directive '" + tokens[0] + "'");
268                    }
269                    defines.put(tokens[1], tokens.length == 3 ? tokens[2] : null);
270                    directiveBuffer.setLength(0);
271                } else {
272                    directiveBuffer.append((char) c);
273                }
274            } else {
275                switch (c) {
276                case '/':
277                    if (hadSlash) {
278                        out.write('/');
279                    }
280                    hadSlash = true;
281                    break;
282                case '*':
283                    if (hadSlash) {
284                        inComment = true;
285                        hadSlash = false;
286                    } else {
287                        out.write(c);
288                    }
289                    break;
290                case '\'':
291                    if (hadSlash) {
292                        out.write('/');
293                    }
294                    hadSlash = false;
295                    out.write(c);
296                    inSingleQuotes = true;
297                    break;
298                case '"':
299                    if (hadSlash) {
300                        out.write('/');
301                    }
302                    hadSlash = false;
303                    out.write(c);
304                    inString = true;
305                    break;
306                case '#':
307                    if (defines == null) {
308                        throw new ImagingException("Unexpected preprocessor directive");
309                    }
310                    inDirective = true;
311                    break;
312                default:
313                    if (hadSlash) {
314                        out.write('/');
315                    }
316                    hadSlash = false;
317                    out.write(c);
318                    // Only whitespace allowed before first comment:
319                    if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
320                        seenFirstComment = true;
321                    }
322                    break;
323                }
324            }
325        }
326        if (hadSlash) {
327            out.write('/');
328        }
329        if (hadStar) {
330            out.write('*');
331        }
332        if (inString) {
333            throw new ImagingException("Unterminated string at the end of file");
334        }
335        if (inComment) {
336            throw new ImagingException("Unterminated comment at the end of file");
337        }
338        return out;
339    }
340
341    public static String[] tokenizeRow(final String row) {
342        final String[] tokens = row.split("[ \t]");
343        int numLiveTokens = 0;
344        for (final String token : tokens) {
345            if (token != null && !token.isEmpty()) {
346                ++numLiveTokens;
347            }
348        }
349        final String[] liveTokens = Allocator.array(numLiveTokens, String[]::new, 24);
350        int next = 0;
351        for (final String token : tokens) {
352            if (token != null && !token.isEmpty()) {
353                liveTokens[next++] = token;
354            }
355        }
356        return liveTokens;
357    }
358
359    public static void unescapeString(final StringBuilder stringBuilder, final String string) throws ImagingException {
360        if (string.length() < 2) {
361            throw new ImagingException("Parsing XPM file failed, " + "string is too short");
362        }
363        if (string.charAt(0) != '"' || string.charAt(string.length() - 1) != '"') {
364            throw new ImagingException("Parsing XPM file failed, " + "string not surrounded by '\"'");
365        }
366        boolean hadBackSlash = false;
367        for (int i = 1; i < string.length() - 1; i++) {
368            final char c = string.charAt(i);
369            if (hadBackSlash) {
370                i = parseEscape(i, stringBuilder, string);
371                hadBackSlash = false;
372            } else if (c == '\\') {
373                hadBackSlash = true;
374            } else if (c == '"') {
375                throw new ImagingException("Parsing XPM file failed, " + "extra '\"' found in string");
376            } else {
377                stringBuilder.append(c);
378            }
379        }
380        if (hadBackSlash) {
381            throw new ImagingException("Parsing XPM file failed, " + "unterminated escape sequence found in string");
382        }
383    }
384
385    private final PushbackInputStream is;
386
387    public BasicCParser(final ByteArrayInputStream is) {
388        this.is = new PushbackInputStream(is);
389    }
390
391    public String nextToken() throws IOException, ImagingException {
392        // I don't know how complete the C parsing in an XPM file
393        // is meant to be, this is just the very basics...
394
395        boolean inString = false;
396        boolean inIdentifier = false;
397        boolean hadBackSlash = false;
398        final StringBuilder token = new StringBuilder();
399        for (int c = is.read(); c != -1; c = is.read()) {
400            if (inString) {
401                switch (c) {
402                case '\\':
403                    token.append('\\');
404                    hadBackSlash = !hadBackSlash;
405                    break;
406                case '"':
407                    token.append('"');
408                    if (!hadBackSlash) {
409                        return token.toString();
410                    }
411                    hadBackSlash = false;
412                    break;
413                case '\r':
414                case '\n':
415                    throw new ImagingException("Unterminated string in XPM file");
416                default:
417                    token.append((char) c);
418                    hadBackSlash = false;
419                    break;
420                }
421            } else if (inIdentifier) {
422                if (!Character.isLetterOrDigit(c) && c != '_') {
423                    is.unread(c);
424                    return token.toString();
425                }
426                token.append((char) c);
427            } else if (c == '"') {
428                token.append('"');
429                inString = true;
430            } else if (Character.isLetterOrDigit(c) || c == '_') {
431                token.append((char) c);
432                inIdentifier = true;
433            } else if (c == '{' || c == '}' || c == '[' || c == ']' || c == '*' || c == ';' || c == '=' || c == ',') {
434                token.append((char) c);
435                return token.toString();
436            } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
437                // ignore
438            } else {
439                throw new ImagingException("Unhandled/invalid character '" + (char) c + "' found in XPM file");
440            }
441        }
442
443        if (inIdentifier) {
444            return token.toString();
445        }
446        if (inString) {
447            throw new ImagingException("Unterminated string ends XMP file");
448        }
449        return null;
450    }
451
452}