Example usage for org.dom4j.io SAXReader setFeature

List of usage examples for org.dom4j.io SAXReader setFeature

Introduction

In this page you can find the example usage for org.dom4j.io SAXReader setFeature.

Prototype

public void setFeature(String name, boolean value) throws SAXException 

Source Link

Document

Sets a SAX feature on the underlying SAX parser.

Usage

From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java

License:Apache License

protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl)
        throws KettleException {

    this.prevRow = buildEmptyRow(); // pre-allocate previous row

    try {/*from  w ww.  j  a v a2s.  c  o m*/
        SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
        data.stopPruning = false;
        // Validate XML against specified schema?
        if (meta.isValidating()) {
            reader.setValidation(true);
            reader.setFeature("http://apache.org/xml/features/validation/schema", true);
        } else {
            // Ignore DTD declarations
            reader.setEntityResolver(new IgnoreDTDEntityResolver());
        }

        // Ignore comments?
        if (meta.isIgnoreComments()) {
            reader.setIgnoreComments(true);
        }

        if (data.prunePath != null) {
            // when pruning is on: reader.read() below will wait until all is processed in the handler
            if (log.isDetailed()) {
                logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated"));
            }
            if (data.PathValue.equals(data.prunePath)) {
                // Edge case, but if true, there will only ever be one item in the list
                data.an = new ArrayList<>(1); // pre-allocate array and sizes
                data.an.add(null);
            }
            reader.addHandler(data.prunePath, new ElementHandler() {
                public void onStart(ElementPath path) {
                    // do nothing here...
                }

                public void onEnd(ElementPath path) {
                    if (isStopped()) {
                        // when a large file is processed and it should be stopped it is still reading the hole thing
                        // the only solution I see is to prune / detach the document and this will lead into a
                        // NPE or other errors depending on the parsing location - this will be treated in the catch part below
                        // any better idea is welcome
                        if (log.isBasic()) {
                            logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped"));
                        }
                        data.stopPruning = true;
                        path.getCurrent().getDocument().detach(); // trick to stop reader
                        return;
                    }

                    // process a ROW element
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing"));
                    }
                    Element row = path.getCurrent();
                    try {
                        // Pass over the row instead of just the document. If
                        // if there's only one row, there's no need to
                        // go back to the whole document.
                        processStreaming(row);
                    } catch (Exception e) {
                        // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem
                        throw new RuntimeException(e);
                    }
                    // prune the tree
                    row.detach();
                    if (log.isDebug()) {
                        logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing"));
                    }
                }
            });
        }

        if (IsInXMLField) {
            // read string to parse
            data.document = reader.read(new StringReader(StringXML));
        } else if (readurl && KettleVFS.startsWithScheme(StringXML)) {
            data.document = reader.read(KettleVFS.getInputStream(StringXML));
        } else if (readurl) {
            // read url as source
            HttpClient client = HttpClientManager.getInstance().createDefaultClient();
            HttpGet method = new HttpGet(StringXML);
            method.addHeader("Accept-Encoding", "gzip");
            HttpResponse response = client.execute(method);
            Header contentEncoding = response.getFirstHeader("Content-Encoding");
            HttpEntity responseEntity = response.getEntity();
            if (responseEntity != null) {
                if (contentEncoding != null) {
                    String acceptEncodingValue = contentEncoding.getValue();
                    if (acceptEncodingValue.contains("gzip")) {
                        GZIPInputStream in = new GZIPInputStream(responseEntity.getContent());

                        data.document = reader.read(in);
                    }
                } else {
                    data.document = reader.read(responseEntity.getContent());
                }
            }
        } else {
            // get encoding. By default UTF-8
            String encoding = "UTF-8";
            if (!Utils.isEmpty(meta.getEncoding())) {
                encoding = meta.getEncoding();
            }
            InputStream is = KettleVFS.getInputStream(file);
            try {
                data.document = reader.read(is, encoding);
            } finally {
                BaseStep.closeQuietly(is);
            }
        }

        if (meta.isNamespaceAware()) {
            prepareNSMap(data.document.getRootElement());
        }
    } catch (Exception e) {
        if (data.stopPruning) {
            // ignore error when pruning
            return false;
        } else {
            throw new KettleException(e);
        }
    }
    return true;
}

From source file:org.pentaho.di.ui.trans.steps.getxmldata.LoopNodesImportProgressDialog.java

License:Apache License

@SuppressWarnings("unchecked")
private String[] doScan(IProgressMonitor monitor) throws Exception {
    monitor.beginTask(/* w w w. j  a  v  a  2 s .c  o m*/
            BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ScanningFile", filename),
            1);

    SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
    monitor.worked(1);
    if (monitor.isCanceled()) {
        return null;
    }
    // Validate XML against specified schema?
    if (meta.isValidating()) {
        reader.setValidation(true);
        reader.setFeature("http://apache.org/xml/features/validation/schema", true);
    } else {
        // Ignore DTD
        reader.setEntityResolver(new IgnoreDTDEntityResolver());
    }
    monitor.worked(1);
    monitor.beginTask(
            BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingDocument"), 1);
    if (monitor.isCanceled()) {
        return null;
    }
    InputStream is = null;
    try {
        Document document = null;
        if (!Utils.isEmpty(filename)) {
            is = KettleVFS.getInputStream(filename);
            document = reader.read(is, encoding);
        } else {
            if (!Utils.isEmpty(xml)) {
                document = reader.read(new StringReader(xml));
            } else {
                document = reader.read(new URL(url));
            }
        }
        monitor.worked(1);
        monitor.beginTask(
                BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.DocumentOpened"), 1);
        monitor.worked(1);
        monitor.beginTask(
                BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingNode"), 1);

        if (monitor.isCanceled()) {
            return null;
        }
        List<Node> nodes = document.selectNodes(document.getRootElement().getName());
        monitor.worked(1);
        monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes"));

        if (monitor.isCanceled()) {
            return null;
        }
        for (Node node : nodes) {
            if (monitor.isCanceled()) {
                return null;
            }
            if (!listpath.contains(node.getPath())) {
                nr++;
                monitor.subTask(BaseMessages.getString(PKG,
                        "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", String.valueOf(nr)));
                monitor.subTask(BaseMessages.getString(PKG,
                        "GetXMLDateLoopNodesImportProgressDialog.Task.AddingNode", node.getPath()));
                listpath.add(node.getPath());
                addLoopXPath(node, monitor);
            }
        }
        monitor.worked(1);
    } finally {
        try {
            if (is != null) {
                is.close();
            }
        } catch (Exception e) { /* Ignore */
        }
    }
    String[] list_xpath = listpath.toArray(new String[listpath.size()]);

    monitor.setTaskName(
            BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.NodesReturned"));

    monitor.done();

    return list_xpath;

}

From source file:org.pentaho.di.ui.trans.steps.getxmldata.XMLInputFieldsImportProgressDialog.java

License:Apache License

@SuppressWarnings("unchecked")
private RowMetaAndData[] doScan(IProgressMonitor monitor) throws Exception {
    monitor.beginTask(//  w ww  . j  a v  a 2  s. co m
            BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ScanningFile", filename),
            1);

    SAXReader reader = XMLParserFactoryProducer.getSAXReader(null);
    monitor.worked(1);
    if (monitor.isCanceled()) {
        return null;
    }
    // Validate XML against specified schema?
    if (meta.isValidating()) {
        reader.setValidation(true);
        reader.setFeature("http://apache.org/xml/features/validation/schema", true);
    } else {
        // Ignore DTD
        reader.setEntityResolver(new IgnoreDTDEntityResolver());
    }
    monitor.worked(1);
    monitor.beginTask(
            BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingDocument"), 1);
    if (monitor.isCanceled()) {
        return null;
    }
    InputStream is = null;
    try {

        Document document = null;
        if (!Utils.isEmpty(filename)) {
            is = KettleVFS.getInputStream(filename);
            document = reader.read(is, encoding);
        } else {
            if (!Utils.isEmpty(xml)) {
                document = reader.read(new StringReader(xml));
            } else {
                document = reader.read(new URL(url));
            }
        }

        monitor.worked(1);
        monitor.beginTask(
                BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.DocumentOpened"), 1);
        monitor.worked(1);
        monitor.beginTask(
                BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingNode"), 1);

        if (monitor.isCanceled()) {
            return null;
        }
        List<Node> nodes = document.selectNodes(this.loopXPath);
        monitor.worked(1);
        monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes"));

        if (monitor.isCanceled()) {
            return null;
        }
        for (Node node : nodes) {
            if (monitor.isCanceled()) {
                return null;
            }

            nr++;
            monitor.subTask(BaseMessages.getString(PKG,
                    "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", String.valueOf(nr)));
            monitor.subTask(BaseMessages.getString(PKG,
                    "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", node.getPath()));
            setNodeField(node, monitor);
            childNode(node, monitor);

        }
        monitor.worked(1);
    } finally {
        try {
            if (is != null) {
                is.close();
            }
        } catch (Exception e) { /* Ignore */
        }
    }

    RowMetaAndData[] listFields = fieldsList.toArray(new RowMetaAndData[fieldsList.size()]);

    monitor.setTaskName(
            BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.NodesReturned"));

    monitor.done();

    return listFields;

}

From source file:org.sysmodb.SpreadsheetTestHelper.java

License:BSD License

public static void validateAgainstSchema(String xml) throws Exception {
    URL resource = WorkbookParserXMLTest.class.getResource("/schema-v1.xsd");
    SAXReader reader = new SAXReader(true);
    reader.setFeature("http://apache.org/xml/features/validation/schema", true);
    reader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage",
            "http://www.w3.org/2001/XMLSchema");
    reader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaSource", new File(resource.getFile()));
    InputSource source = new InputSource(new StringReader(xml));
    source.setEncoding("UTF-8");
    try {//ww  w  . j  ava 2  s  .  c  om
        reader.read(source);
    } catch (DocumentException e) {
        // System.out.println(xml);
        throw e;
    }

}

From source file:org.talend.metadata.managment.ui.wizard.metadata.xml.utils.CopyDeleteFileUtilForWizard.java

License:Open Source License

public static List<String> getComplexNodes(String xsdFile) {
    List<String> attri = new ArrayList<String>();
    File file = new File(xsdFile);
    if (!file.exists()) {
        return attri;
    }/*from  www.  ja v a2 s .  c om*/

    SAXReader saxReader = new SAXReader();
    Document doc;
    try {
        URL url = file.toURI().toURL();
        saxReader.setFeature("http://xml.org/sax/features/validation", false);

        saxReader.setEntityResolver(new EntityResolver() {

            String emptyDtd = "";

            ByteArrayInputStream bytes = new ByteArrayInputStream(emptyDtd.getBytes());

            @Override
            public InputSource resolveEntity(String publicId, String systemId)
                    throws SAXException, IOException {
                File file = new File(systemId);
                if (file.exists()) {
                    return new InputSource(new FileInputStream(file));
                }
                // if no file, just set empty content for dtd
                return new InputSource(bytes);
            }
        });

        doc = saxReader.read(url.getFile());
        Element root = doc.getRootElement();
        List<Element> complexList = root.elements("complexType");
        if (complexList == null) {
            return attri;
        }
        for (Element n : complexList) {
            Attribute attr = n.attribute("name");
            if (attr != null) {
                attri.add(attr.getValue());
            }
        }
    } catch (DocumentException e) {
        ExceptionHandler.process(e);
    } catch (MalformedURLException e) {
        ExceptionHandler.process(e);
    } catch (SAXException e) {
        ExceptionHandler.process(e);
    }
    return attri;
}

From source file:org.unitime.banner.ant.MergeXml.java

License:Apache License

public void execute() throws BuildException {
    try {//from  ww  w  .j a  v  a  2 s  .  com
        log("Merging " + iTarget + " with " + iSource);
        SAXReader sax = new SAXReader();
        sax.setEntityResolver(new EntityResolver() {
            @Override
            public InputSource resolveEntity(String publicId, String systemId)
                    throws SAXException, IOException {
                if (publicId.equals("-//Hibernate/Hibernate Mapping DTD 3.0//EN")) {
                    return new InputSource(getClass().getClassLoader()
                            .getResourceAsStream("org/hibernate/hibernate-mapping-3.0.dtd"));
                } else if (publicId.equals("-//Hibernate/Hibernate Configuration DTD 3.0//EN")) {
                    return new InputSource(getClass().getClassLoader()
                            .getResourceAsStream("org/hibernate/hibernate-configuration-3.0.dtd"));
                }
                return null;
            }
        });
        sax.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
        Document targetDoc = sax.read(new File(iTarget));
        Document sourceDoc = sax.read(new File(iSource));

        merge(targetDoc.getRootElement(), sourceDoc.getRootElement());

        if (new File(iTarget).getName().equals("hibernate.cfg.xml")) {
            targetDoc.setDocType(sourceDoc.getDocType()); // Remove DOCTYPE
            Element sessionFactoryElement = targetDoc.getRootElement().element("session-factory");
            Vector<Element> mappings = new Vector<Element>();
            for (Iterator i = sessionFactoryElement.elementIterator("mapping"); i.hasNext();) {
                Element mappingElement = (Element) i.next();
                mappings.add(mappingElement);
                sessionFactoryElement.remove(mappingElement);
            }
            for (Iterator i = mappings.iterator(); i.hasNext();) {
                Element mappingElement = (Element) i.next();
                sessionFactoryElement.add(mappingElement);
            }
        }

        FileOutputStream fos = new FileOutputStream(iTarget);
        (new XMLWriter(fos, OutputFormat.createPrettyPrint())).write(targetDoc);
        fos.flush();
        fos.close();
    } catch (Exception e) {
        e.printStackTrace();
        throw new BuildException(e);
    }
}

From source file:pt.webdetails.cpf.utils.XmlParserFactoryProducer.java

License:Open Source License

/**
 * Creates an instance of {@link SAXReader} class
 * with features that prevent from some XXE attacks (e.g. XML bomb)
 * See PPP-3506 for more details./*from w w w .  java2  s.  com*/
 * See also https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Prevention_Cheat_Sheet
 *
 * @param resolver Is {@link EntityResolver} or null
 * @return {@link SAXReader}
 */
public static SAXReader getSAXReader(final EntityResolver resolver) {
    SAXReader reader = new SAXReader();
    if (resolver != null) {
        reader.setEntityResolver(resolver);
    }
    try {
        reader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
        reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
        reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
        reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    } catch (SAXException e) {
        logger.error("Some parser properties are not supported.");
    }
    reader.setIncludeExternalDTDDeclarations(false);
    reader.setIncludeInternalDTDDeclarations(false);
    return reader;
}