001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.mail; 018 019import java.io.IOException; 020import java.util.HashMap; 021import java.util.Map; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import javax.activation.DataSource; 026 027/** 028 * <p> 029 * Small wrapper class on top of HtmlEmail which encapsulates the required logic to retrieve images that are contained in "<img src=../>" elements in the 030 * HTML code. This is done by replacing all img-src-elements with "cid:"-entries and embedding images in the email. 031 * </p> 032 * <p> 033 * For local files the class tries to either load them via an absolute path or - if available - use a relative path starting from a base directory. For files 034 * that are not found locally, the implementation tries to download the element and link it in. 035 * </p> 036 * <p> 037 * The image loading is done by an instance of {@code DataSourceResolver} which has to be provided by the caller. 038 * </p> 039 * 040 * @since 1.3 041 */ 042public class ImageHtmlEmail extends HtmlEmail { 043 // Regular Expression to find all <IMG SRC="..."> entries in an HTML 044 // document.It needs to cater for various things, like more whitespaces 045 // including newlines on any place, HTML is not case sensitive and there 046 // can be arbitrary text between "IMG" and "SRC" like IDs and other things. 047 048 /** Regexp for extracting {@code <img>} tags */ 049 public static final String REGEX_IMG_SRC = "(<[Ii][Mm][Gg]\\s*[^>]*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 050 051 /** Regexp for extracting {@code <script>} tags */ 052 public static final String REGEX_SCRIPT_SRC = "(<[Ss][Cc][Rr][Ii][Pp][Tt]\\s*.*?\\s+[Ss][Rr][Cc]\\s*=\\s*[\"'])([^\"']+?)([\"'])"; 053 054 // this pattern looks for the HTML image tag which indicates embedded images, 055 // the grouping is necessary to allow to replace the element with the CID 056 057 /** Pattern for extracting {@code <img>} tags */ 058 private static final Pattern IMG_PATTERN = Pattern.compile(REGEX_IMG_SRC); 059 060 /** Pattern for extracting {@code <script>} tags */ 061 private static final Pattern SCRIPT_PATTERN = Pattern.compile(REGEX_SCRIPT_SRC); 062 063 /** Resolve the images and script resources to a DataSource */ 064 private DataSourceResolver dataSourceResolver; 065 066 /** 067 * Constructs a new instance. 068 */ 069 public ImageHtmlEmail() { 070 // empty 071 } 072 073 /** 074 * Does the work of actually building the MimeMessage. 075 * 076 * @see org.apache.commons.mail.HtmlEmail#buildMimeMessage() 077 * @throws EmailException building the MimeMessage failed 078 */ 079 @Override 080 public void buildMimeMessage() throws EmailException { 081 try { 082 // embed all the matching image and script resources within the email 083 String temp = replacePattern(getHtml(), IMG_PATTERN); 084 temp = replacePattern(temp, SCRIPT_PATTERN); 085 setHtmlMsg(temp); 086 super.buildMimeMessage(); 087 } catch (final IOException e) { 088 throw new EmailException("Building the MimeMessage failed", e); 089 } 090 } 091 092 /** 093 * Gets the data source resolver. 094 * 095 * @return the resolver 096 */ 097 public DataSourceResolver getDataSourceResolver() { 098 return dataSourceResolver; 099 } 100 101 /** 102 * Replace the regexp matching resource locations with "cid:..." references. 103 * 104 * @param htmlMessage the HTML message to analyze 105 * @param pattern the regular expression to find resources 106 * @return the HTML message containing "cid" references 107 * @throws EmailException creating the email failed 108 * @throws IOException resolving the resources failed 109 */ 110 private String replacePattern(final String htmlMessage, final Pattern pattern) throws EmailException, IOException { 111 DataSource dataSource; 112 final StringBuffer stringBuffer = new StringBuffer(); 113 114 // maps "cid" --> name 115 final Map<String, String> cidCache = new HashMap<>(); 116 117 // maps "name" --> dataSource 118 final Map<String, DataSource> dataSourceCache = new HashMap<>(); 119 120 // in the String, replace all "img src" with a CID and embed the related 121 // image file if we find it. 122 final Matcher matcher = pattern.matcher(htmlMessage); 123 124 // the matcher returns all instances one by one 125 while (matcher.find()) { 126 // in the RegEx we have the <src> element as second "group" 127 final String resourceLocation = matcher.group(2); 128 129 // avoid loading the same data source more than once 130 if (dataSourceCache.get(resourceLocation) == null) { 131 // in lenient mode we might get a 'null' data source if the resource was not found 132 dataSource = getDataSourceResolver().resolve(resourceLocation); 133 134 if (dataSource != null) { 135 dataSourceCache.put(resourceLocation, dataSource); 136 } 137 } else { 138 dataSource = dataSourceCache.get(resourceLocation); 139 } 140 141 if (dataSource != null) { 142 String name = dataSource.getName(); 143 if (EmailUtils.isEmpty(name)) { 144 name = resourceLocation; 145 } 146 147 String cid = cidCache.get(name); 148 149 if (cid == null) { 150 cid = embed(dataSource, name); 151 cidCache.put(name, cid); 152 } 153 154 // if we embedded something, then we need to replace the URL with 155 // the CID, otherwise the Matcher takes care of adding the 156 // non-replaced text afterwards, so no else is necessary here! 157 matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(matcher.group(1) + "cid:" + cid + matcher.group(3))); 158 } 159 } 160 161 // append the remaining items... 162 matcher.appendTail(stringBuffer); 163 164 cidCache.clear(); 165 dataSourceCache.clear(); 166 167 return stringBuffer.toString(); 168 } 169 170 /** 171 * Sets the data source resolver. 172 * 173 * @param dataSourceResolver the resolver 174 */ 175 public void setDataSourceResolver(final DataSourceResolver dataSourceResolver) { 176 this.dataSourceResolver = dataSourceResolver; 177 } 178}