001// Copyright 2009-2013 The Apache Software Foundation 002// 003// Licensed under the Apache License, Version 2.0 (the "License"); 004// you may not use this file except in compliance with the License. 005// You may obtain a copy of the License at 006// 007// http://www.apache.org/licenses/LICENSE-2.0 008// 009// Unless required by applicable law or agreed to in writing, software 010// distributed under the License is distributed on an "AS IS" BASIS, 011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012// See the License for the specific language governing permissions and 013// limitations under the License. 014 015package org.apache.tapestry5.internal.services; 016 017import org.apache.tapestry5.ioc.Location; 018import org.apache.tapestry5.ioc.Resource; 019import org.apache.tapestry5.ioc.internal.util.CollectionFactory; 020import org.apache.tapestry5.ioc.internal.util.InternalUtils; 021import org.apache.tapestry5.ioc.internal.util.LocationImpl; 022import org.apache.tapestry5.ioc.util.ExceptionUtils; 023import org.xml.sax.*; 024import org.xml.sax.ext.Attributes2; 025import org.xml.sax.ext.LexicalHandler; 026import org.xml.sax.helpers.XMLReaderFactory; 027 028import javax.xml.namespace.QName; 029import java.io.*; 030import java.net.URL; 031import java.util.Collections; 032import java.util.List; 033import java.util.Map; 034 035/** 036 * Parses a document as a stream of XML tokens. It includes a special hack (as of Tapestry 5.3) to support the HTML5 doctype ({@code <!DOCTYPE html>}) 037 * as if it were the XHTML transitional doctype 038 * ({@code <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">}). 039 */ 040public class XMLTokenStream 041{ 042 043 public static final String TRANSITIONAL_DOCTYPE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"; 044 045 private static final DTDData HTML5_DTD_DATA = new DTDData("html", null, null); 046 047 private final class SaxHandler implements LexicalHandler, EntityResolver, ContentHandler 048 { 049 private Locator locator; 050 051 private int currentLine = -1; 052 053 private Location cachedLocation; 054 055 private Location textLocation; 056 057 private final StringBuilder builder = new StringBuilder(); 058 059 private boolean inCDATA, insideDTD; 060 061 private List<NamespaceMapping> namespaceMappings = CollectionFactory.newList(); 062 063 private Location getLocation() 064 { 065 int line = locator.getLineNumber(); 066 067 if (currentLine != line) 068 cachedLocation = null; 069 070 if (cachedLocation == null) 071 { 072 // lineOffset accounts for the extra line when a doctype is injected. The line number reported 073 // from the XML parser inlcudes the phantom doctype line, the lineOffset is used to subtract one 074 // to get the real line number. 075 cachedLocation = new LocationImpl(resource, line + lineOffset); 076 } 077 078 return cachedLocation; 079 } 080 081 private XMLToken add(XMLTokenType type) 082 { 083 XMLToken token = new XMLToken(type, getLocation()); 084 085 tokens.add(token); 086 087 return token; 088 } 089 090 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, 091 IOException 092 { 093 URL url = publicIdToURL.get(publicId); 094 095 try 096 { 097 if (url != null) 098 return new InputSource(url.openStream()); 099 } catch (IOException ex) 100 { 101 throw new SAXException(String.format("Unable to open stream for resource %s: %s", 102 url, ExceptionUtils.toMessage(ex)), ex); 103 } 104 105 return null; 106 } 107 108 public void comment(char[] ch, int start, int length) throws SAXException 109 { 110 if (insideDTD) 111 return; 112 113 // TODO: Coalesce? 114 add(XMLTokenType.COMMENT).text = new String(ch, start, length); 115 } 116 117 public void startCDATA() throws SAXException 118 { 119 // TODO: Flush characters? 120 121 inCDATA = true; 122 } 123 124 public void endCDATA() throws SAXException 125 { 126 if (builder.length() != 0) 127 { 128 add(XMLTokenType.CDATA).text = builder.toString(); 129 } 130 131 builder.setLength(0); 132 inCDATA = false; 133 } 134 135 public void characters(char[] ch, int start, int length) throws SAXException 136 { 137 if (inCDATA) 138 { 139 builder.append(ch, start, length); 140 return; 141 } 142 143 XMLToken token = new XMLToken(XMLTokenType.CHARACTERS, textLocation); 144 token.text = new String(ch, start, length); 145 146 tokens.add(token); 147 } 148 149 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 150 { 151 characters(ch, start, length); 152 } 153 154 public void startDTD(final String name, final String publicId, final String systemId) 155 throws SAXException 156 { 157 insideDTD = true; 158 159 if (!ignoreDTD) 160 { 161 DTDData data = html5DTD ? HTML5_DTD_DATA : new DTDData(name, publicId, systemId); 162 163 add(XMLTokenType.DTD).dtdData = data; 164 } 165 } 166 167 public void endDocument() throws SAXException 168 { 169 add(XMLTokenType.END_DOCUMENT); 170 } 171 172 public void endElement(String uri, String localName, String qName) throws SAXException 173 { 174 add(XMLTokenType.END_ELEMENT); 175 } 176 177 public void setDocumentLocator(Locator locator) 178 { 179 this.locator = locator; 180 } 181 182 /** 183 * Checks for the extra namespace injected when the transitional doctype is injected (which 184 * occurs when the template contains no doctype). 185 */ 186 private boolean ignoreURI(String uri) 187 { 188 return ignoreDTD && uri.equals("http://www.w3.org/1999/xhtml"); 189 } 190 191 public void startElement(String uri, String localName, String qName, Attributes attributes) 192 throws SAXException 193 { 194 XMLToken token = add(XMLTokenType.START_ELEMENT); 195 196 token.uri = ignoreURI(uri) ? "" : uri; 197 token.localName = localName; 198 token.qName = qName; 199 200 // The XML parser tends to reuse the same Attributes object, so 201 // capture the data out of it. 202 203 Attributes2 a2 = (attributes instanceof Attributes2) ? (Attributes2) attributes : null; 204 205 if (attributes.getLength() == 0) 206 { 207 token.attributes = Collections.emptyList(); 208 } else 209 { 210 token.attributes = CollectionFactory.newList(); 211 212 for (int i = 0; i < attributes.getLength(); i++) 213 { 214 // Filter out attributes that are not present in the XML input stream, but were 215 // instead provided by DTD defaulting. 216 217 if (a2 != null && !a2.isSpecified(i)) 218 { 219 continue; 220 } 221 222 String prefixedName = attributes.getQName(i); 223 224 int lastColon = prefixedName.lastIndexOf(':'); 225 226 String prefix = lastColon > 0 ? prefixedName.substring(0, lastColon) : ""; 227 228 QName qname = new QName(attributes.getURI(i), attributes.getLocalName(i), 229 prefix); 230 231 token.attributes.add(new AttributeInfo(qname, attributes.getValue(i))); 232 } 233 } 234 235 token.namespaceMappings = CollectionFactory.newList(namespaceMappings); 236 237 namespaceMappings.clear(); 238 239 // Any text collected starts here as well: 240 241 textLocation = getLocation(); 242 } 243 244 public void startPrefixMapping(String prefix, String uri) throws SAXException 245 { 246 if (ignoreDTD && prefix.equals("") && uri.equals("http://www.w3.org/1999/xhtml")) 247 { 248 return; 249 } 250 251 namespaceMappings.add(new NamespaceMapping(prefix, uri)); 252 } 253 254 public void endDTD() throws SAXException 255 { 256 insideDTD = false; 257 } 258 259 public void endEntity(String name) throws SAXException 260 { 261 } 262 263 public void startEntity(String name) throws SAXException 264 { 265 } 266 267 public void endPrefixMapping(String prefix) throws SAXException 268 { 269 } 270 271 public void processingInstruction(String target, String data) throws SAXException 272 { 273 } 274 275 public void skippedEntity(String name) throws SAXException 276 { 277 } 278 279 public void startDocument() throws SAXException 280 { 281 } 282 } 283 284 private int cursor = -1; 285 286 private final List<XMLToken> tokens = CollectionFactory.newList(); 287 288 private final Resource resource; 289 290 private final Map<String, URL> publicIdToURL; 291 292 private Location exceptionLocation; 293 294 private boolean html5DTD, ignoreDTD; 295 296 private int lineOffset; 297 298 public XMLTokenStream(Resource resource, Map<String, URL> publicIdToURL) 299 { 300 this.resource = resource; 301 this.publicIdToURL = publicIdToURL; 302 } 303 304 public void parse() throws SAXException, IOException 305 { 306 SaxHandler handler = new SaxHandler(); 307 308 XMLReader reader = XMLReaderFactory.createXMLReader(); 309 310 reader.setContentHandler(handler); 311 reader.setEntityResolver(handler); 312 reader.setProperty("http://xml.org/sax/properties/lexical-handler", handler); 313 314 InputStream stream = openStream(); 315 316 try 317 { 318 reader.parse(new InputSource(stream)); 319 } catch (IOException ex) 320 { 321 this.exceptionLocation = handler.getLocation(); 322 323 throw ex; 324 } catch (SAXException ex) 325 { 326 this.exceptionLocation = handler.getLocation(); 327 328 throw ex; 329 } catch (RuntimeException ex) 330 { 331 this.exceptionLocation = handler.getLocation(); 332 333 throw ex; 334 } finally 335 { 336 InternalUtils.close(stream); 337 } 338 } 339 340 enum State 341 { 342 MAYBE_XML, MAYBE_DOCTYPE, JUST_COPY 343 } 344 345 private InputStream openStream() throws IOException 346 { 347 InputStream rawStream = resource.openStream(); 348 349 String transformationEncoding = "UTF8"; 350 351 InputStreamReader rawReader = new InputStreamReader(rawStream, transformationEncoding); 352 LineNumberReader reader = new LineNumberReader(rawReader); 353 354 ByteArrayOutputStream bos = new ByteArrayOutputStream(5000); 355 PrintWriter writer = new PrintWriter(new OutputStreamWriter(bos, transformationEncoding)); 356 357 State state = State.MAYBE_XML; 358 359 try 360 { 361 while (true) 362 { 363 String line = reader.readLine(); 364 365 if (line == null) 366 { 367 break; 368 } 369 370 switch (state) 371 { 372 373 case MAYBE_XML: 374 375 if (line.toLowerCase().startsWith("<?xml")) 376 { 377 writer.println(line); 378 state = State.MAYBE_DOCTYPE; 379 continue; 380 } 381 382 case MAYBE_DOCTYPE: 383 384 if (line.trim().length() == 0) 385 { 386 writer.println(line); 387 continue; 388 } 389 390 String lineLower = line.toLowerCase(); 391 392 if (lineLower.equals("<!doctype html>")) 393 { 394 html5DTD = true; 395 writer.println(TRANSITIONAL_DOCTYPE); 396 state = State.JUST_COPY; 397 continue; 398 } 399 400 401 if (lineLower.startsWith("<!doctype")) 402 { 403 writer.println(line); 404 state = State.JUST_COPY; 405 continue; 406 } 407 408 // No doctype, let's provide one. 409 410 ignoreDTD = true; 411 lineOffset = -1; 412 writer.println(TRANSITIONAL_DOCTYPE); 413 414 state = State.JUST_COPY; 415 416 // And drop down to writing out the actual line, and all following lines. 417 418 case JUST_COPY: 419 writer.println(line); 420 } 421 } 422 } finally 423 { 424 writer.close(); 425 reader.close(); 426 } 427 428 return new ByteArrayInputStream(bos.toByteArray()); 429 } 430 431 private XMLToken token() 432 { 433 return tokens.get(cursor); 434 } 435 436 /** 437 * Returns the type of the next token. 438 */ 439 public XMLTokenType next() 440 { 441 cursor++; 442 443 // TODO: Check for overflow? 444 445 return getEventType(); 446 } 447 448 public int getAttributeCount() 449 { 450 return token().attributes.size(); 451 } 452 453 public QName getAttributeName(int i) 454 { 455 return token().attributes.get(i).attributeName; 456 } 457 458 public DTDData getDTDInfo() 459 { 460 return token().dtdData; 461 } 462 463 public XMLTokenType getEventType() 464 { 465 return token().type; 466 } 467 468 public String getLocalName() 469 { 470 return token().localName; 471 } 472 473 public Location getLocation() 474 { 475 if (exceptionLocation != null) 476 return exceptionLocation; 477 478 return token().getLocation(); 479 } 480 481 public int getNamespaceCount() 482 { 483 return token().namespaceMappings.size(); 484 } 485 486 public String getNamespacePrefix(int i) 487 { 488 return token().namespaceMappings.get(i).prefix; 489 } 490 491 public String getNamespaceURI() 492 { 493 return token().uri; 494 } 495 496 public String getNamespaceURI(int i) 497 { 498 return token().namespaceMappings.get(i).uri; 499 } 500 501 public String getText() 502 { 503 return token().text; 504 } 505 506 public boolean hasNext() 507 { 508 return cursor < tokens.size() - 1; 509 } 510 511 public String getAttributeValue(int i) 512 { 513 return token().attributes.get(i).value; 514 } 515 516}