List of usage examples for org.dom4j.io SAXReader setFeature
public void setFeature(String name, boolean value) throws SAXException
From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java
License:Apache License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { this.prevRow = buildEmptyRow(); // pre-allocate previous row try {/*from w ww. j a v a2s. c o m*/ SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD declarations reader.setEntityResolver(new IgnoreDTDEntityResolver()); } // Ignore comments? if (meta.isIgnoreComments()) { reader.setIgnoreComments(true); } if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all is processed in the handler if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated")); } if (data.PathValue.equals(data.prunePath)) { // Edge case, but if true, there will only ever be one item in the list data.an = new ArrayList<>(1); // pre-allocate array and sizes data.an.add(null); } reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be stopped it is still reading the hole thing // the only solution I see is to prune / detach the document and this will lead into a // NPE or other errors depending on the parsing location - this will be treated in the catch part below // any better idea is welcome if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped")); } data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick to stop reader return; } // process a ROW element if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing")); } Element row = path.getCurrent(); try { // Pass over the row instead of just the document. If // if there's only one row, there's no need to // go back to the whole document. processStreaming(row); } catch (Exception e) { // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing")); } } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl && KettleVFS.startsWithScheme(StringXML)) { data.document = reader.read(KettleVFS.getInputStream(StringXML)); } else if (readurl) { // read url as source HttpClient client = HttpClientManager.getInstance().createDefaultClient(); HttpGet method = new HttpGet(StringXML); method.addHeader("Accept-Encoding", "gzip"); HttpResponse response = client.execute(method); Header contentEncoding = response.getFirstHeader("Content-Encoding"); HttpEntity responseEntity = response.getEntity(); if (responseEntity != null) { if (contentEncoding != null) { String acceptEncodingValue = contentEncoding.getValue(); if (acceptEncodingValue.contains("gzip")) { GZIPInputStream in = new GZIPInputStream(responseEntity.getContent()); data.document = reader.read(in); } } else { data.document = reader.read(responseEntity.getContent()); } } } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Utils.isEmpty(meta.getEncoding())) { encoding = meta.getEncoding(); } InputStream is = KettleVFS.getInputStream(file); try { data.document = reader.read(is, encoding); } finally { BaseStep.closeQuietly(is); } } if (meta.isNamespaceAware()) { prepareNSMap(data.document.getRootElement()); } } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }
From source file:org.pentaho.di.ui.trans.steps.getxmldata.LoopNodesImportProgressDialog.java
License:Apache License
@SuppressWarnings("unchecked") private String[] doScan(IProgressMonitor monitor) throws Exception { monitor.beginTask(/* w w w. j a v a 2 s .c o m*/ BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ScanningFile", filename), 1); SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); monitor.worked(1); if (monitor.isCanceled()) { return null; } // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD reader.setEntityResolver(new IgnoreDTDEntityResolver()); } monitor.worked(1); monitor.beginTask( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingDocument"), 1); if (monitor.isCanceled()) { return null; } InputStream is = null; try { Document document = null; if (!Utils.isEmpty(filename)) { is = KettleVFS.getInputStream(filename); document = reader.read(is, encoding); } else { if (!Utils.isEmpty(xml)) { document = reader.read(new StringReader(xml)); } else { document = reader.read(new URL(url)); } } monitor.worked(1); monitor.beginTask( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.DocumentOpened"), 1); monitor.worked(1); monitor.beginTask( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingNode"), 1); if (monitor.isCanceled()) { return null; } List<Node> nodes = document.selectNodes(document.getRootElement().getName()); monitor.worked(1); monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes")); if (monitor.isCanceled()) { return null; } for (Node node : nodes) { if (monitor.isCanceled()) { return null; } if (!listpath.contains(node.getPath())) { nr++; monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", String.valueOf(nr))); monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.AddingNode", node.getPath())); listpath.add(node.getPath()); addLoopXPath(node, monitor); } } monitor.worked(1); } finally { try { if (is != null) { is.close(); } } catch (Exception e) { /* Ignore */ } } String[] list_xpath = listpath.toArray(new String[listpath.size()]); monitor.setTaskName( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.NodesReturned")); monitor.done(); return list_xpath; }
From source file:org.pentaho.di.ui.trans.steps.getxmldata.XMLInputFieldsImportProgressDialog.java
License:Apache License
@SuppressWarnings("unchecked") private RowMetaAndData[] doScan(IProgressMonitor monitor) throws Exception { monitor.beginTask(// w ww . j a v a 2 s. co m BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ScanningFile", filename), 1); SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); monitor.worked(1); if (monitor.isCanceled()) { return null; } // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD reader.setEntityResolver(new IgnoreDTDEntityResolver()); } monitor.worked(1); monitor.beginTask( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingDocument"), 1); if (monitor.isCanceled()) { return null; } InputStream is = null; try { Document document = null; if (!Utils.isEmpty(filename)) { is = KettleVFS.getInputStream(filename); document = reader.read(is, encoding); } else { if (!Utils.isEmpty(xml)) { document = reader.read(new StringReader(xml)); } else { document = reader.read(new URL(url)); } } monitor.worked(1); monitor.beginTask( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.DocumentOpened"), 1); monitor.worked(1); monitor.beginTask( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.ReadingNode"), 1); if (monitor.isCanceled()) { return null; } List<Node> nodes = document.selectNodes(this.loopXPath); monitor.worked(1); monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes")); if (monitor.isCanceled()) { return null; } for (Node node : nodes) { if (monitor.isCanceled()) { return null; } nr++; monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", String.valueOf(nr))); monitor.subTask(BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.FetchNodes", node.getPath())); setNodeField(node, monitor); childNode(node, monitor); } monitor.worked(1); } finally { try { if (is != null) { is.close(); } } catch (Exception e) { /* Ignore */ } } RowMetaAndData[] listFields = fieldsList.toArray(new RowMetaAndData[fieldsList.size()]); monitor.setTaskName( BaseMessages.getString(PKG, "GetXMLDateLoopNodesImportProgressDialog.Task.NodesReturned")); monitor.done(); return listFields; }
From source file:org.sysmodb.SpreadsheetTestHelper.java
License:BSD License
public static void validateAgainstSchema(String xml) throws Exception { URL resource = WorkbookParserXMLTest.class.getResource("/schema-v1.xsd"); SAXReader reader = new SAXReader(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); reader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); reader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaSource", new File(resource.getFile())); InputSource source = new InputSource(new StringReader(xml)); source.setEncoding("UTF-8"); try {//ww w . j ava 2 s . c om reader.read(source); } catch (DocumentException e) { // System.out.println(xml); throw e; } }
From source file:org.talend.metadata.managment.ui.wizard.metadata.xml.utils.CopyDeleteFileUtilForWizard.java
License:Open Source License
public static List<String> getComplexNodes(String xsdFile) { List<String> attri = new ArrayList<String>(); File file = new File(xsdFile); if (!file.exists()) { return attri; }/*from www. ja v a2 s . c om*/ SAXReader saxReader = new SAXReader(); Document doc; try { URL url = file.toURI().toURL(); saxReader.setFeature("http://xml.org/sax/features/validation", false); saxReader.setEntityResolver(new EntityResolver() { String emptyDtd = ""; ByteArrayInputStream bytes = new ByteArrayInputStream(emptyDtd.getBytes()); @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { File file = new File(systemId); if (file.exists()) { return new InputSource(new FileInputStream(file)); } // if no file, just set empty content for dtd return new InputSource(bytes); } }); doc = saxReader.read(url.getFile()); Element root = doc.getRootElement(); List<Element> complexList = root.elements("complexType"); if (complexList == null) { return attri; } for (Element n : complexList) { Attribute attr = n.attribute("name"); if (attr != null) { attri.add(attr.getValue()); } } } catch (DocumentException e) { ExceptionHandler.process(e); } catch (MalformedURLException e) { ExceptionHandler.process(e); } catch (SAXException e) { ExceptionHandler.process(e); } return attri; }
From source file:org.unitime.banner.ant.MergeXml.java
License:Apache License
public void execute() throws BuildException { try {//from ww w .j a v a 2 s . com log("Merging " + iTarget + " with " + iSource); SAXReader sax = new SAXReader(); sax.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (publicId.equals("-//Hibernate/Hibernate Mapping DTD 3.0//EN")) { return new InputSource(getClass().getClassLoader() .getResourceAsStream("org/hibernate/hibernate-mapping-3.0.dtd")); } else if (publicId.equals("-//Hibernate/Hibernate Configuration DTD 3.0//EN")) { return new InputSource(getClass().getClassLoader() .getResourceAsStream("org/hibernate/hibernate-configuration-3.0.dtd")); } return null; } }); sax.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document targetDoc = sax.read(new File(iTarget)); Document sourceDoc = sax.read(new File(iSource)); merge(targetDoc.getRootElement(), sourceDoc.getRootElement()); if (new File(iTarget).getName().equals("hibernate.cfg.xml")) { targetDoc.setDocType(sourceDoc.getDocType()); // Remove DOCTYPE Element sessionFactoryElement = targetDoc.getRootElement().element("session-factory"); Vector<Element> mappings = new Vector<Element>(); for (Iterator i = sessionFactoryElement.elementIterator("mapping"); i.hasNext();) { Element mappingElement = (Element) i.next(); mappings.add(mappingElement); sessionFactoryElement.remove(mappingElement); } for (Iterator i = mappings.iterator(); i.hasNext();) { Element mappingElement = (Element) i.next(); sessionFactoryElement.add(mappingElement); } } FileOutputStream fos = new FileOutputStream(iTarget); (new XMLWriter(fos, OutputFormat.createPrettyPrint())).write(targetDoc); fos.flush(); fos.close(); } catch (Exception e) { e.printStackTrace(); throw new BuildException(e); } }
From source file:pt.webdetails.cpf.utils.XmlParserFactoryProducer.java
License:Open Source License
/** * Creates an instance of {@link SAXReader} class * with features that prevent from some XXE attacks (e.g. XML bomb) * See PPP-3506 for more details./*from w w w . java2 s. com*/ * See also https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Prevention_Cheat_Sheet * * @param resolver Is {@link EntityResolver} or null * @return {@link SAXReader} */ public static SAXReader getSAXReader(final EntityResolver resolver) { SAXReader reader = new SAXReader(); if (resolver != null) { reader.setEntityResolver(resolver); } try { reader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); reader.setFeature("http://xml.org/sax/features/external-general-entities", false); reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false); reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } catch (SAXException e) { logger.error("Some parser properties are not supported."); } reader.setIncludeExternalDTDDeclarations(false); reader.setIncludeInternalDTDDeclarations(false); return reader; }