001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.imaging.formats.tiff;
018
019import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes;
020import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes;
021import static org.apache.commons.imaging.common.BinaryFunctions.read8Bytes;
022import static org.apache.commons.imaging.common.BinaryFunctions.readByte;
023import static org.apache.commons.imaging.common.BinaryFunctions.readBytes;
024import static org.apache.commons.imaging.common.BinaryFunctions.skipBytes;
025import static org.apache.commons.imaging.formats.tiff.constants.TiffConstants.TIFF_ENTRY_MAX_VALUE_LENGTH;
026import static org.apache.commons.imaging.formats.tiff.constants.TiffConstants.TIFF_ENTRY_MAX_VALUE_LENGTH_BIG;
027import static org.apache.commons.imaging.formats.tiff.constants.TiffConstants.TIFF_VERSION_BIG;
028import static org.apache.commons.imaging.formats.tiff.constants.TiffConstants.TIFF_VERSION_STANDARD;
029
030import java.io.IOException;
031import java.io.InputStream;
032import java.nio.ByteOrder;
033import java.util.ArrayList;
034import java.util.List;
035
036import org.apache.commons.imaging.FormatCompliance;
037import org.apache.commons.imaging.ImagingException;
038import org.apache.commons.imaging.bytesource.ByteSource;
039import org.apache.commons.imaging.common.BinaryFileParser;
040import org.apache.commons.imaging.common.ByteConversions;
041import org.apache.commons.imaging.formats.jpeg.JpegConstants;
042import org.apache.commons.imaging.formats.tiff.TiffDirectory.ImageDataElement;
043import org.apache.commons.imaging.formats.tiff.constants.ExifTagConstants;
044import org.apache.commons.imaging.formats.tiff.constants.TiffDirectoryConstants;
045import org.apache.commons.imaging.formats.tiff.constants.TiffTagConstants;
046import org.apache.commons.imaging.formats.tiff.fieldtypes.AbstractFieldType;
047import org.apache.commons.imaging.formats.tiff.taginfos.TagInfoDirectory;
048
049public class TiffReader extends BinaryFileParser {
050
051    private static class Collector implements Listener {
052
053        private TiffHeader tiffHeader;
054        private final List<TiffDirectory> directories = new ArrayList<>();
055        private final List<TiffField> fields = new ArrayList<>();
056        private final boolean readThumbnails;
057
058        Collector() {
059            this(new TiffImagingParameters());
060        }
061
062        Collector(final TiffImagingParameters params) {
063            this.readThumbnails = params.isReadThumbnails();
064        }
065
066        @Override
067        public boolean addDirectory(final TiffDirectory directory) {
068            directories.add(directory);
069            return true;
070        }
071
072        @Override
073        public boolean addField(final TiffField field) {
074            fields.add(field);
075            return true;
076        }
077
078        public TiffContents getContents() {
079            return new TiffContents(tiffHeader, directories, fields);
080        }
081
082        @Override
083        public boolean readImageData() {
084            return readThumbnails;
085        }
086
087        @Override
088        public boolean readOffsetDirectories() {
089            return true;
090        }
091
092        @Override
093        public boolean setTiffHeader(final TiffHeader tiffHeader) {
094            this.tiffHeader = tiffHeader;
095            return true;
096        }
097    }
098
099    private static final class FirstDirectoryCollector extends Collector {
100        private final boolean readImageData;
101
102        FirstDirectoryCollector(final boolean readImageData) {
103            this.readImageData = readImageData;
104        }
105
106        @Override
107        public boolean addDirectory(final TiffDirectory directory) {
108            super.addDirectory(directory);
109            return false;
110        }
111
112        @Override
113        public boolean readImageData() {
114            return readImageData;
115        }
116    }
117
118    public interface Listener {
119        boolean addDirectory(TiffDirectory directory);
120
121        boolean addField(TiffField field);
122
123        boolean readImageData();
124
125        boolean readOffsetDirectories();
126
127        boolean setTiffHeader(TiffHeader tiffHeader);
128    }
129
130    private final boolean strict;
131    private boolean bigTiff;
132    private boolean standardTiff;
133    private int entryMaxValueLength;
134
135    public TiffReader(final boolean strict) {
136        this.strict = strict;
137    }
138
139    private JpegImageData getJpegRawImageData(final ByteSource byteSource, final TiffDirectory directory) throws ImagingException, IOException {
140        final ImageDataElement element = directory.getJpegRawImageDataElement();
141        final long offset = element.offset;
142        int length = element.length;
143        // In case the length is not correct, adjust it and check if the last read byte actually is the end of the image
144        if (offset + length > byteSource.size()) {
145            length = (int) (byteSource.size() - offset);
146        }
147        final byte[] data = byteSource.getByteArray(offset, length);
148        // check if the last read byte is actually the end of the image data
149        if (strict && (length < 2 || ((data[data.length - 2] & 0xff) << 8 | data[data.length - 1] & 0xff) != JpegConstants.EOI_MARKER)) {
150            throw new ImagingException("JPEG EOI marker could not be found at expected location");
151        }
152        return new JpegImageData(offset, length, data);
153    }
154
155    private ByteOrder getTiffByteOrder(final int byteOrderByte) throws ImagingException {
156        if (byteOrderByte == 'I') {
157            return ByteOrder.LITTLE_ENDIAN; // Intel
158        }
159        if (byteOrderByte == 'M') {
160            return ByteOrder.BIG_ENDIAN; // Motorola
161        }
162        throw new ImagingException("Invalid TIFF byte order " + (0xff & byteOrderByte));
163    }
164
165    private AbstractTiffImageData getTiffRawImageData(final ByteSource byteSource, final TiffDirectory directory) throws ImagingException, IOException {
166
167        final List<ImageDataElement> elements = directory.getTiffRawImageDataElements();
168        final AbstractTiffImageData.Data[] data = new AbstractTiffImageData.Data[elements.size()];
169
170        for (int i = 0; i < elements.size(); i++) {
171            final TiffDirectory.ImageDataElement element = elements.get(i);
172            final byte[] bytes = byteSource.getByteArray(element.offset, element.length);
173            data[i] = new AbstractTiffImageData.Data(element.offset, element.length, bytes);
174        }
175
176        if (directory.imageDataInStrips()) {
177            final TiffField rowsPerStripField = directory.findField(TiffTagConstants.TIFF_TAG_ROWS_PER_STRIP);
178            //
179            // Default value of rowsPerStripField is assumed to be infinity
180            // https://www.awaresystems.be/imaging/tiff/tifftags/rowsperstrip.html
181            //
182            int rowsPerStrip = Integer.MAX_VALUE;
183
184            if (null != rowsPerStripField) {
185                rowsPerStrip = rowsPerStripField.getIntValue();
186            } else {
187                final TiffField imageHeight = directory.findField(TiffTagConstants.TIFF_TAG_IMAGE_LENGTH);
188                //
189                // if rows per strip not present then rowsPerStrip is equal to
190                // imageLength or an infinity value;
191                //
192                if (imageHeight != null) {
193                    rowsPerStrip = imageHeight.getIntValue();
194                }
195
196            }
197
198            return new AbstractTiffImageData.Strips(data, rowsPerStrip);
199        }
200        final TiffField tileWidthField = directory.findField(TiffTagConstants.TIFF_TAG_TILE_WIDTH);
201        if (null == tileWidthField) {
202            throw new ImagingException("Can't find tile width field.");
203        }
204        final int tileWidth = tileWidthField.getIntValue();
205
206        final TiffField tileLengthField = directory.findField(TiffTagConstants.TIFF_TAG_TILE_LENGTH);
207        if (null == tileLengthField) {
208            throw new ImagingException("Can't find tile length field.");
209        }
210        final int tileLength = tileLengthField.getIntValue();
211
212        return new AbstractTiffImageData.Tiles(data, tileWidth, tileLength);
213    }
214
215    public void read(final ByteSource byteSource, final FormatCompliance formatCompliance, final Listener listener) throws ImagingException, IOException {
216        readDirectories(byteSource, formatCompliance, listener);
217    }
218
219    public TiffContents readContents(final ByteSource byteSource, final TiffImagingParameters params, final FormatCompliance formatCompliance)
220            throws ImagingException, IOException {
221
222        final Collector collector = new Collector(params);
223        read(byteSource, formatCompliance, collector);
224        return collector.getContents();
225    }
226
227    public TiffContents readDirectories(final ByteSource byteSource, final boolean readImageData, final FormatCompliance formatCompliance)
228            throws ImagingException, IOException {
229        final TiffImagingParameters params = new TiffImagingParameters();
230        params.setReadThumbnails(readImageData);
231        final Collector collector = new Collector(params);
232        readDirectories(byteSource, formatCompliance, collector);
233        final TiffContents contents = collector.getContents();
234        if (contents.directories.isEmpty()) {
235            throw new ImagingException("Image did not contain any directories.");
236        }
237        return contents;
238    }
239
240//    NOT USED
241//    private static final class DirectoryCollector extends Collector {
242//        private final boolean readImageData;
243//
244//        public DirectoryCollector(final boolean readImageData) {
245//            this.readImageData = readImageData;
246//        }
247//
248//        @Override
249//        public boolean addDirectory(final TiffDirectory directory) {
250//            super.addDirectory(directory);
251//            return false;
252//        }
253//
254//        @Override
255//        public boolean readImageData() {
256//            return readImageData;
257//        }
258//    }
259
260    private void readDirectories(final ByteSource byteSource, final FormatCompliance formatCompliance, final Listener listener)
261            throws ImagingException, IOException {
262        final TiffHeader tiffHeader = readTiffHeader(byteSource);
263        if (!listener.setTiffHeader(tiffHeader)) {
264            return;
265        }
266
267        final long offset = tiffHeader.offsetToFirstIFD;
268        final int dirType = TiffDirectoryConstants.DIRECTORY_TYPE_ROOT;
269
270        final List<Number> visited = new ArrayList<>();
271        readDirectory(byteSource, offset, dirType, formatCompliance, listener, visited);
272    }
273
274    private boolean readDirectory(final ByteSource byteSource, final long directoryOffset, final int dirType, final FormatCompliance formatCompliance,
275            final Listener listener, final boolean ignoreNextDirectory, final List<Number> visited) throws ImagingException, IOException {
276
277        if (visited.contains(directoryOffset)) {
278            return false;
279        }
280        visited.add(directoryOffset);
281
282        try (InputStream is = byteSource.getInputStream()) {
283            if (directoryOffset >= byteSource.size()) {
284                return true;
285            }
286
287            skipBytes(is, directoryOffset);
288
289            final List<TiffField> fields = new ArrayList<>();
290
291            long entryCount;
292            try {
293                if (standardTiff) {
294                    entryCount = read2Bytes("DirectoryEntryCount", is, "Not a Valid TIFF File", getByteOrder());
295                } else {
296                    entryCount = read8Bytes("DirectoryEntryCount", is, "Not a Valid TIFF File", getByteOrder());
297                }
298            } catch (final IOException e) {
299                if (strict) {
300                    throw e;
301                }
302                return true;
303            }
304
305            for (int i = 0; i < entryCount; i++) {
306                final int tag = read2Bytes("Tag", is, "Not a Valid TIFF File", getByteOrder());
307                final int type = read2Bytes("Type", is, "Not a Valid TIFF File", getByteOrder());
308                final long count;
309                final byte[] offsetBytes;
310                final long offset;
311                if (standardTiff) {
312                    count = 0xFFFFffffL & read4Bytes("Count", is, "Not a Valid TIFF File", getByteOrder());
313                    offsetBytes = readBytes("Offset", is, 4, "Not a Valid TIFF File");
314                    offset = 0xFFFFffffL & ByteConversions.toInt(offsetBytes, getByteOrder());
315                } else {
316                    count = read8Bytes("Count", is, "Not a Valid TIFF File", getByteOrder());
317                    offsetBytes = readBytes("Offset", is, 8, "Not a Valid TIFF File");
318                    offset = ByteConversions.toLong(offsetBytes, getByteOrder());
319                }
320
321                if (tag == 0) {
322                    // skip invalid fields.
323                    // These are seen very rarely, but can have invalid value
324                    // lengths,
325                    // which can cause OOM problems.
326                    continue;
327                }
328
329                final AbstractFieldType abstractFieldType;
330                try {
331                    abstractFieldType = AbstractFieldType.getFieldType(type);
332                } catch (final ImagingException imageReadEx) {
333                    // skip over unknown fields types, since we
334                    // can't calculate their size without
335                    // knowing their type
336                    continue;
337                }
338                final long valueLength = count * abstractFieldType.getSize();
339                final byte[] value;
340                if (valueLength > entryMaxValueLength) {
341                    if (offset < 0 || offset + valueLength > byteSource.size()) {
342                        if (strict) {
343                            throw new IOException("Attempt to read byte range starting from " + offset + " " + "of length " + valueLength + " "
344                                    + "which is outside the file's size of " + byteSource.size());
345                        }
346                        // corrupt field, ignore it
347                        continue;
348                    }
349                    value = byteSource.getByteArray(offset, (int) valueLength);
350                } else {
351                    value = offsetBytes;
352                }
353
354                final TiffField field = new TiffField(tag, dirType, abstractFieldType, count, offset, value, getByteOrder(), i);
355
356                fields.add(field);
357
358                if (!listener.addField(field)) {
359                    return true;
360                }
361            }
362
363            final long nextDirectoryOffset = 0xFFFFffffL & read4Bytes("nextDirectoryOffset", is, "Not a Valid TIFF File", getByteOrder());
364
365            final TiffDirectory directory = new TiffDirectory(dirType, fields, directoryOffset, nextDirectoryOffset, getByteOrder());
366
367            if (listener.readImageData()) {
368                if (directory.hasTiffImageData()) {
369                    final AbstractTiffImageData rawImageData = getTiffRawImageData(byteSource, directory);
370                    directory.setTiffImageData(rawImageData);
371                }
372                if (directory.hasJpegImageData()) {
373                    final JpegImageData rawJpegImageData = getJpegRawImageData(byteSource, directory);
374                    directory.setJpegImageData(rawJpegImageData);
375                }
376            }
377
378            if (!listener.addDirectory(directory)) {
379                return true;
380            }
381
382            if (listener.readOffsetDirectories()) {
383                final TagInfoDirectory[] offsetFields = { ExifTagConstants.EXIF_TAG_EXIF_OFFSET, ExifTagConstants.EXIF_TAG_GPSINFO,
384                        ExifTagConstants.EXIF_TAG_INTEROP_OFFSET };
385                final int[] directoryTypes = { TiffDirectoryConstants.DIRECTORY_TYPE_EXIF, TiffDirectoryConstants.DIRECTORY_TYPE_GPS,
386                        TiffDirectoryConstants.DIRECTORY_TYPE_INTEROPERABILITY };
387                for (int i = 0; i < offsetFields.length; i++) {
388                    final TagInfoDirectory offsetField = offsetFields[i];
389                    final TiffField field = directory.findField(offsetField);
390                    if (field != null) {
391                        long subDirectoryOffset;
392                        int subDirectoryType;
393                        boolean subDirectoryRead = false;
394                        try {
395                            subDirectoryOffset = directory.getFieldValue(offsetField);
396                            subDirectoryType = directoryTypes[i];
397                            subDirectoryRead = readDirectory(byteSource, subDirectoryOffset, subDirectoryType, formatCompliance, listener, true, visited);
398
399                        } catch (final ImagingException imageReadException) {
400                            if (strict) {
401                                throw imageReadException;
402                            }
403                        }
404                        if (!subDirectoryRead) {
405                            fields.remove(field);
406                        }
407                    }
408                }
409            }
410
411            if (!ignoreNextDirectory && directory.getNextDirectoryOffset() > 0) {
412                // Debug.debug("next dir", directory.nextDirectoryOffset );
413                readDirectory(byteSource, directory.getNextDirectoryOffset(), dirType + 1, formatCompliance, listener, visited);
414            }
415
416            return true;
417        }
418    }
419
420    private boolean readDirectory(final ByteSource byteSource, final long offset, final int dirType, final FormatCompliance formatCompliance,
421            final Listener listener, final List<Number> visited) throws ImagingException, IOException {
422        final boolean ignoreNextDirectory = false;
423        return readDirectory(byteSource, offset, dirType, formatCompliance, listener, ignoreNextDirectory, visited);
424    }
425
426    public TiffContents readFirstDirectory(final ByteSource byteSource, final boolean readImageData, final FormatCompliance formatCompliance)
427            throws ImagingException, IOException {
428        final Collector collector = new FirstDirectoryCollector(readImageData);
429        read(byteSource, formatCompliance, collector);
430        final TiffContents contents = collector.getContents();
431        if (contents.directories.isEmpty()) {
432            throw new ImagingException("Image did not contain any directories.");
433        }
434        return contents;
435    }
436
437    private TiffHeader readTiffHeader(final ByteSource byteSource) throws ImagingException, IOException {
438        try (InputStream is = byteSource.getInputStream()) {
439            return readTiffHeader(is);
440        }
441    }
442
443    private TiffHeader readTiffHeader(final InputStream is) throws ImagingException, IOException {
444        final int byteOrder1 = readByte("BYTE_ORDER_1", is, "Not a Valid TIFF File");
445        final int byteOrder2 = readByte("BYTE_ORDER_2", is, "Not a Valid TIFF File");
446        if (byteOrder1 != byteOrder2) {
447            throw new ImagingException("Byte Order bytes don't match (" + byteOrder1 + ", " + byteOrder2 + ").");
448        }
449
450        final ByteOrder byteOrder = getTiffByteOrder(byteOrder1);
451        setByteOrder(byteOrder);
452
453        // verify that the file is a supported TIFF format using
454        // the numeric indentifier
455        // Classic TIFF (32 bit): 42
456        // Big TIFF (64 bit): 43
457        //
458        final long offsetToFirstIFD;
459        final int tiffVersion = read2Bytes("tiffVersion", is, "Not a Valid TIFF File", getByteOrder());
460        if (tiffVersion == TIFF_VERSION_STANDARD) {
461            bigTiff = false;
462            standardTiff = true;
463            entryMaxValueLength = TIFF_ENTRY_MAX_VALUE_LENGTH;
464            offsetToFirstIFD = 0xFFFFffffL & read4Bytes("offsetToFirstIFD", is, "Not a Valid TIFF File", getByteOrder());
465        } else if (tiffVersion == TIFF_VERSION_BIG) {
466            bigTiff = true;
467            standardTiff = false;
468            entryMaxValueLength = TIFF_ENTRY_MAX_VALUE_LENGTH_BIG;
469            final int byteSize = read2Bytes("bytesizeOfOffset", is, "Not a Valid TIFF File", getByteOrder());
470            final int expectedZero = read2Bytes("expectedZero", is, "Not a Valid TIFF File", getByteOrder());
471            if (byteSize != 8 || expectedZero != 0) {
472                throw new ImagingException("Misformed Big-TIFF header: " + tiffVersion);
473            }
474            offsetToFirstIFD = read8Bytes("offsetToFirstIFD", is, "Not a Valid TIFF File", getByteOrder());
475        } else {
476            throw new ImagingException("Unknown TIFF Version: " + tiffVersion);
477        }
478
479        skipBytes(is, offsetToFirstIFD - 8, "Not a Valid TIFF File: couldn't find IFDs");
480
481        return new TiffHeader(byteOrder, tiffVersion, offsetToFirstIFD, bigTiff);
482    }
483}