Example usage for org.dom4j.io SAXReader setIgnoreComments

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader setIgnoreComments.

Prototype

public void setIgnoreComments(boolean ignoreComments)

Source Link

Document

Sets whether we should ignore comments or not.

Usage

From source file:com.panet.imeta.trans.steps.getxmldata.GetXMLData.java

License:Open Source License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    try {/*from   ww  w.j ava2s.c om*/
        SAXReader reader = new SAXReader();
        data.stopPruning = false;

        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        }

        // Ignore comments?
        if (meta.isIgnoreComments())
            reader.setIgnoreComments(true);

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all
            // is processed in the handler
            if (log.isDetailed())
                logDetailed(Messages.getString("GetXMLData.Log.StreamingMode.Activated"));
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be
                        // stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the
                        // document and this will lead into a
                        // NPE or other errors depending on the parsing
                        // location - this will be treated in the catch part
                        // below
                        // any better idea is welcome
                        if (log.isBasic())
                            logBasic(Messages.getString("GetXMLData.Log.StreamingMode.Stopped"));
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick
                        // to
                        // stop
                        // reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.StartProcessing"));
                    Element row = path.getCurrent();
                    try {
                        processStreaming(row.getDocument());
                    } catch (Exception e) {
                        // catch the KettleException or others and forward
                        // to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug())
                        logDebug(Messages.getString("GetXMLData.Log.StreamingMode.EndProcessing"));
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl) {
            // read url as source
            data.document = reader.read(new URL(StringXML));
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Const.isEmpty(meta.getEncoding()))
                encoding = meta.getEncoding();
            data.document = reader.read(KettleVFS.getInputStream(file), encoding);
        }

        if (meta.isNamespaceAware())
            prepareNSMap(data.document.getRootElement());
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}

From source file:com.webslingerz.jpt.PageTemplateImpl.java

License:Open Source License

private static SAXReader createXMLReader() throws SAXException {
    SAXReader reader = new SAXReader();
    reader.setIgnoreComments(false);
    reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    return reader;
}

From source file:de.fct.companian.analyze.mvn.helper.PomHelper.java

License:Apache License

public PomHelper(File pomFile) throws DocumentException {
    this.pomFile = pomFile;
    this.document = null;

    SAXReader reader = new SAXReader();
    reader.setEncoding("ISO-8859-1");
    reader.setIgnoreComments(true);
    reader.setValidation(false);/*from w w w  .  j  a  va 2 s .  co m*/

    try {
        this.document = reader.read(this.pomFile);
    } catch (Throwable t) {
        t.printStackTrace();
    }

    if (this.document != null) {
        Element projectElement = this.document.getRootElement();
        Namespace defaultNS = projectElement.getNamespace();
        if (logger.isDebugEnabled()) {
            logger.debug("extractPomInfo() using default namespace " + defaultNS.getURI());
        }

        Map<String, String> nsMap = new HashMap<String, String>();
        nsMap.put("mvn", defaultNS.getURI());

        this.nsContext = new SimpleNamespaceContext(nsMap);
    } else {
        throw new DocumentException("Could not create document.");
    }
}

From source file:edu.ku.brc.specify.tools.AppendHelp.java

License:Open Source License

/**
 * Reads a DOM from a stream//from  w  w w.j  ava  2s  .c o m
 * @param fileinputStream the stream to be read
 * @return the root element of the DOM
 */
public Element readFileToDOM4J(final File file) throws IOException, DocumentException {
    SAXReader saxReader = new SAXReader();

    try {
        saxReader.setValidation(false);
        saxReader.setStripWhitespaceText(true);
        //saxReader.setIncludeExternalDTDDeclarations(false);
        //saxReader.setIncludeInternalDTDDeclarations(false);
        saxReader.setIgnoreComments(true);
        //saxReader.setXMLFilter(new TransparentFilter(saxReader.getXMLReader()));

        EntityResolver entityResolver = new EntityResolver() {
            public InputSource resolveEntity(String publicId, String systemId) {
                return new InputSource("");
            }
        };
        saxReader.setEntityResolver(entityResolver);

        //saxReader.getXMLFilter().setDTDHandler(null);

    } catch (Exception ex) {
        ex.printStackTrace();
    }

    org.dom4j.Document document = saxReader.read(new FileInputStream(file));
    return document.getRootElement();
}

From source file:org.craftercms.core.store.impl.AbstractFileBasedContentStoreAdapter.java

License:Open Source License

/**
 * Creates and configures an XML SAX reader.
 */// w  w  w.  jav  a 2  s .  c  o  m
protected SAXReader createXmlReader() {
    SAXReader xmlReader = new SAXReader();
    xmlReader.setMergeAdjacentText(true);
    xmlReader.setStripWhitespaceText(true);
    xmlReader.setIgnoreComments(true);

    try {
        xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
        xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
        xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
    } catch (SAXException ex) {
        LOGGER.error("Unable to turn off external entity loading, This could be a security risk.", ex);
    }

    return xmlReader;
}

From source file:org.metaeffekt.dita.maven.glossary.GlossaryMapCreator.java

License:Apache License

protected Document readDocument(File file) {
    SAXReader reader = new SAXReader();
    reader.setValidation(false);//w ww .  j  a  va  2 s . c  om
    reader.setIncludeInternalDTDDeclarations(false);
    reader.setIncludeExternalDTDDeclarations(false);
    reader.setIgnoreComments(true);
    reader.setEntityResolver(new EntityResolver() {
        @Override
        public InputSource resolveEntity(String arg0, String arg1) throws SAXException, IOException {
            return new InputSource(new InputStream() {
                @Override
                public int read() throws IOException {
                    return -1;
                }
            });
        }
    });
    try {
        return reader.read(file);
    } catch (DocumentException e) {
        return null;
    }
}

From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java

License:Apache License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    this.prevRow = buildEmptyRow(); // pre-allocate previous row

    try {/*from  w  ww.  j  ava  2  s . c  o  m*/
        SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
        data.stopPruning = false;
        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        } else {
            // Ignore DTD declarations
            reader.setEntityResolver(new IgnoreDTDEntityResolver());
        }

        // Ignore comments?
        if (meta.isIgnoreComments()) {
            reader.setIgnoreComments(true);
        }

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all is processed in the handler
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated"));
            }
            if (data.PathValue.equals(data.prunePath)) {
                // Edge case, but if true, there will only ever be one item in the list
                data.an = new ArrayList<>(1); // pre-allocate array and sizes
                data.an.add(null);
            }
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the document and this will lead into a
                        // NPE or other errors depending on the parsing location - this will be treated in the catch part below
                        // any better idea is welcome
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped"));
                        }
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick to stop reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing"));
                    }
                    Element row = path.getCurrent();
                    try {
                        // Pass over the row instead of just the document. If
                        // if there's only one row, there's no need to
                        // go back to the whole document.
                        processStreaming(row);
                    } catch (Exception e) {
                        // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing"));
                    }
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl && KettleVFS.startsWithScheme(StringXML)) {
            data.document = reader.read(KettleVFS.getInputStream(StringXML));
        } else if (readurl) {
            // read url as source
            HttpClient client = HttpClientManager.getInstance().createDefaultClient();
            HttpGet method = new HttpGet(StringXML);
            method.addHeader("Accept-Encoding", "gzip");
            HttpResponse response = client.execute(method);
            Header contentEncoding = response.getFirstHeader("Content-Encoding");
            HttpEntity responseEntity = response.getEntity();
            if (responseEntity != null) {
                if (contentEncoding != null) {
                    String acceptEncodingValue = contentEncoding.getValue();
                    if (acceptEncodingValue.contains("gzip")) {
                        GZIPInputStream in = new GZIPInputStream(responseEntity.getContent());

                        data.document = reader.read(in);
                    }
                } else {
                    data.document = reader.read(responseEntity.getContent());
                }
            }
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Utils.isEmpty(meta.getEncoding())) {
                encoding = meta.getEncoding();
            }
            InputStream is = KettleVFS.getInputStream(file);
            try {
                data.document = reader.read(is, encoding);
            } finally {
                BaseStep.closeQuietly(is);
            }
        }

        if (meta.isNamespaceAware()) {
            prepareNSMap(data.document.getRootElement());
        }
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}