001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.mail;
018
019import java.io.IOException;
020import java.util.HashMap;
021import java.util.Map;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import javax.activation.DataSource;
026
027/**
028 * <p>
029 * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "&lt;img src=../&gt;" elements in the
030 * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email.
031 * </p>
032 * <p>
033 * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files
034 * that are not found locally, the implementation tries to download the element and link it in.
035 * </p>
036 * <p>
037 * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller.
038 * </p>
039 *
040 * @since 1.3
041 */
042public class ImageHtmlEmail extends HtmlEmail {
043    // Regular Expression to find all <IMG SRC="..."> entries in an HTML
044    // document.It needs to cater for various things, like more whitespaces
045    // including newlines on any place, HTML is not case sensitive and there
046    // can be arbitrary text between "IMG" and "SRC" like IDs and other things.
047
048    /** Regexp for extracting {@code <img>} tags */
049    public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
050
051    /** Regexp for extracting {@code <script>} tags */
052    public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])";
053
054    // this pattern looks for the HTML image tag which indicates embedded images,
055    // the grouping is necessary to allow to replace the element with the CID
056
057    /** Pattern for extracting {@code <img>} tags */
058    private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC);
059
060    /** Pattern for extracting {@code <script>} tags */
061    private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC);
062
063    /** Resolve the images and script resources to a DataSource */
064    private DataSourceResolver dataSourceResolver;
065
066    /**
067     * Constructs a new instance.
068     */
069    public ImageHtmlEmail() {
070        // empty
071    }
072
073    /**
074     * Does the work of actually building the MimeMessage.
075     *
076     * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage()
077     * @throws EmailException building the MimeMessage failed
078     */
079    @Override
080    public void buildMimeMessage() throws EmailException {
081        try {
082            // embed all the matching image and script resources within the email
083            String temp = replacePattern(getHtml(), IMG_PATTERN);
084            temp = replacePattern(temp, SCRIPT_PATTERN);
085            setHtmlMsg(temp);
086            super.buildMimeMessage();
087        } catch (final IOException e) {
088            throw new EmailException("Building the MimeMessage failed", e);
089        }
090    }
091
092    /**
093     * Gets the data source resolver.
094     *
095     * @return the resolver
096     */
097    public DataSourceResolver getDataSourceResolver() {
098        return dataSourceResolver;
099    }
100
101    /**
102     * Replace the regexp matching resource locations with "cid:..." references.
103     *
104     * @param htmlMessage the HTML message to analyze
105     * @param pattern     the regular expression to find resources
106     * @return the HTML message containing "cid" references
107     * @throws EmailException creating the email failed
108     * @throws IOException    resolving the resources failed
109     */
110    private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException {
111        DataSource dataSource;
112        final StringBuffer stringBuffer = new StringBuffer();
113
114        // maps "cid" --> name
115        final Map<String, String> cidCache = new HashMap<>();
116
117        // maps "name" --> dataSource
118        final Map<String, DataSource> dataSourceCache = new HashMap<>();
119
120        // in the String, replace all "img src" with a CID and embed the related
121        // image file if we find it.
122        final Matcher matcher = pattern.matcher(htmlMessage);
123
124        // the matcher returns all instances one by one
125        while (matcher.find()) {
126            // in the RegEx we have the <src> element as second "group"
127            final String resourceLocation = matcher.group(2);
128
129            // avoid loading the same data source more than once
130            if (dataSourceCache.get(resourceLocation) == null) {
131                // in lenient mode we might get a 'null' data source if the resource was not found
132                dataSource = getDataSourceResolver().resolve(resourceLocation);
133
134                if (dataSource != null) {
135                    dataSourceCache.put(resourceLocation, dataSource);
136                }
137            } else {
138                dataSource = dataSourceCache.get(resourceLocation);
139            }
140
141            if (dataSource != null) {
142                String name = dataSource.getName();
143                if (EmailUtils.isEmpty(name)) {
144                    name = resourceLocation;
145                }
146
147                String cid = cidCache.get(name);
148
149                if (cid == null) {
150                    cid = embed(dataSource, name);
151                    cidCache.put(name, cid);
152                }
153
154                // if we embedded something, then we need to replace the URL with
155                // the CID, otherwise the Matcher takes care of adding the
156                // non-replaced text afterwards, so no else is necessary here!
157                matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3)));
158            }
159        }
160
161        // append the remaining items...
162        matcher.appendTail(stringBuffer);
163
164        cidCache.clear();
165        dataSourceCache.clear();
166
167        return stringBuffer.toString();
168    }
169
170    /**
171     * Sets the data source resolver.
172     *
173     * @param dataSourceResolver the resolver
174     */
175    public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) {
176        this.dataSourceResolver = dataSourceResolver;
177    }
178}