List of usage examples for org.dom4j.io SAXReader setIgnoreComments
public void setIgnoreComments(boolean ignoreComments)
From source file:com.panet.imeta.trans.steps.getxmldata.GetXMLData.java
License:Open Source License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { try {/*from ww w.j ava2s.c om*/ SAXReader reader = new SAXReader(); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } // Ignore comments? if (meta.isIgnoreComments()) reader.setIgnoreComments(true); if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all // is processed in the handler if (log.isDetailed()) logDetailed(Messages.getString("GetXMLData.Log.StreamingMode.Activated")); reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be // stopped it is still reading the hole thing // the only solution I see is to prune / detach the // document and this will lead into a // NPE or other errors depending on the parsing // location - this will be treated in the catch part // below // any better idea is welcome if (log.isBasic()) logBasic(Messages.getString("GetXMLData.Log.StreamingMode.Stopped")); data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick // to // stop // reader return; } // process a ROW element if (log.isDebug()) logDebug(Messages.getString("GetXMLData.Log.StreamingMode.StartProcessing")); Element row = path.getCurrent(); try { processStreaming(row.getDocument()); } catch (Exception e) { // catch the KettleException or others and forward // to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) logDebug(Messages.getString("GetXMLData.Log.StreamingMode.EndProcessing")); } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl) { // read url as source data.document = reader.read(new URL(StringXML)); } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Const.isEmpty(meta.getEncoding())) encoding = meta.getEncoding(); data.document = reader.read(KettleVFS.getInputStream(file), encoding); } if (meta.isNamespaceAware()) prepareNSMap(data.document.getRootElement()); } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }
From source file:com.webslingerz.jpt.PageTemplateImpl.java
License:Open Source License
private static SAXReader createXMLReader() throws SAXException { SAXReader reader = new SAXReader(); reader.setIgnoreComments(false); reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); return reader; }
From source file:de.fct.companian.analyze.mvn.helper.PomHelper.java
License:Apache License
public PomHelper(File pomFile) throws DocumentException { this.pomFile = pomFile; this.document = null; SAXReader reader = new SAXReader(); reader.setEncoding("ISO-8859-1"); reader.setIgnoreComments(true); reader.setValidation(false);/*from w w w . j a va 2 s . co m*/ try { this.document = reader.read(this.pomFile); } catch (Throwable t) { t.printStackTrace(); } if (this.document != null) { Element projectElement = this.document.getRootElement(); Namespace defaultNS = projectElement.getNamespace(); if (logger.isDebugEnabled()) { logger.debug("extractPomInfo() using default namespace " + defaultNS.getURI()); } Map<String, String> nsMap = new HashMap<String, String>(); nsMap.put("mvn", defaultNS.getURI()); this.nsContext = new SimpleNamespaceContext(nsMap); } else { throw new DocumentException("Could not create document."); } }
From source file:edu.ku.brc.specify.tools.AppendHelp.java
License:Open Source License
/** * Reads a DOM from a stream//from w w w.j ava 2s .c o m * @param fileinputStream the stream to be read * @return the root element of the DOM */ public Element readFileToDOM4J(final File file) throws IOException, DocumentException { SAXReader saxReader = new SAXReader(); try { saxReader.setValidation(false); saxReader.setStripWhitespaceText(true); //saxReader.setIncludeExternalDTDDeclarations(false); //saxReader.setIncludeInternalDTDDeclarations(false); saxReader.setIgnoreComments(true); //saxReader.setXMLFilter(new TransparentFilter(saxReader.getXMLReader())); EntityResolver entityResolver = new EntityResolver() { public InputSource resolveEntity(String publicId, String systemId) { return new InputSource(""); } }; saxReader.setEntityResolver(entityResolver); //saxReader.getXMLFilter().setDTDHandler(null); } catch (Exception ex) { ex.printStackTrace(); } org.dom4j.Document document = saxReader.read(new FileInputStream(file)); return document.getRootElement(); }
From source file:org.craftercms.core.store.impl.AbstractFileBasedContentStoreAdapter.java
License:Open Source License
/** * Creates and configures an XML SAX reader. */// w w w. jav a 2 s . c o m protected SAXReader createXmlReader() { SAXReader xmlReader = new SAXReader(); xmlReader.setMergeAdjacentText(true); xmlReader.setStripWhitespaceText(true); xmlReader.setIgnoreComments(true); try { xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities", false); } catch (SAXException ex) { LOGGER.error("Unable to turn off external entity loading, This could be a security risk.", ex); } return xmlReader; }
From source file:org.metaeffekt.dita.maven.glossary.GlossaryMapCreator.java
License:Apache License
protected Document readDocument(File file) { SAXReader reader = new SAXReader(); reader.setValidation(false);//w ww . j a va 2 s . c om reader.setIncludeInternalDTDDeclarations(false); reader.setIncludeExternalDTDDeclarations(false); reader.setIgnoreComments(true); reader.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(String arg0, String arg1) throws SAXException, IOException { return new InputSource(new InputStream() { @Override public int read() throws IOException { return -1; } }); } }); try { return reader.read(file); } catch (DocumentException e) { return null; } }
From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java
License:Apache License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { this.prevRow = buildEmptyRow(); // pre-allocate previous row try {/*from w ww. j ava 2 s . c o m*/ SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD declarations reader.setEntityResolver(new IgnoreDTDEntityResolver()); } // Ignore comments? if (meta.isIgnoreComments()) { reader.setIgnoreComments(true); } if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all is processed in the handler if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated")); } if (data.PathValue.equals(data.prunePath)) { // Edge case, but if true, there will only ever be one item in the list data.an = new ArrayList<>(1); // pre-allocate array and sizes data.an.add(null); } reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be stopped it is still reading the hole thing // the only solution I see is to prune / detach the document and this will lead into a // NPE or other errors depending on the parsing location - this will be treated in the catch part below // any better idea is welcome if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped")); } data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick to stop reader return; } // process a ROW element if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing")); } Element row = path.getCurrent(); try { // Pass over the row instead of just the document. If // if there's only one row, there's no need to // go back to the whole document. processStreaming(row); } catch (Exception e) { // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing")); } } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl && KettleVFS.startsWithScheme(StringXML)) { data.document = reader.read(KettleVFS.getInputStream(StringXML)); } else if (readurl) { // read url as source HttpClient client = HttpClientManager.getInstance().createDefaultClient(); HttpGet method = new HttpGet(StringXML); method.addHeader("Accept-Encoding", "gzip"); HttpResponse response = client.execute(method); Header contentEncoding = response.getFirstHeader("Content-Encoding"); HttpEntity responseEntity = response.getEntity(); if (responseEntity != null) { if (contentEncoding != null) { String acceptEncodingValue = contentEncoding.getValue(); if (acceptEncodingValue.contains("gzip")) { GZIPInputStream in = new GZIPInputStream(responseEntity.getContent()); data.document = reader.read(in); } } else { data.document = reader.read(responseEntity.getContent()); } } } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Utils.isEmpty(meta.getEncoding())) { encoding = meta.getEncoding(); } InputStream is = KettleVFS.getInputStream(file); try { data.document = reader.read(is, encoding); } finally { BaseStep.closeQuietly(is); } } if (meta.isNamespaceAware()) { prepareNSMap(data.document.getRootElement()); } } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }