List of usage examples for org.dom4j.io DOMReader read
public Document read(org.w3c.dom.Document domDocument)
From source file:W3cDocument.java
License:Open Source License
public static String getXml(org.w3c.dom.Document w3cDoc, String encoding) { try {/* ww w. j a v a 2 s . c o m*/ org.dom4j.io.DOMReader xmlReader = new org.dom4j.io.DOMReader(); org.dom4j.Document dom4jDoc = xmlReader.read(w3cDoc); //? OutputFormat format = new OutputFormat();//(" ", true); //? format.setEncoding(encoding); //format.setOmitEncoding(true); format.setSuppressDeclaration(true); //xml StringWriter out = new StringWriter(); XMLWriter xmlWriter = new XMLWriter(out, format); xmlWriter.setEscapeText(true); //?doc xmlWriter.write(dom4jDoc); xmlWriter.flush(); //??printWriter String xml = out.toString(); out.close(); return xml; } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:com.alibaba.citrus.springext.util.DomUtil.java
License:Open Source License
/** W3C element??DOM4j element */ public static org.dom4j.Element convertElement(Element element) { Document doc;//from w w w .j a va2 s.co m try { doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); } catch (ParserConfigurationException e) { throw new IllegalArgumentException("Failed to create dom4j document", e); } Element clonedElement = (Element) doc.importNode(element, true); doc.appendChild(clonedElement); DOMReader reader = new DOMReader(); org.dom4j.Document dom4jDoc = reader.read(doc); return dom4jDoc.getRootElement(); }
From source file:com.amalto.webapp.core.util.XmlUtil.java
License:Open Source License
public static Document parseDocument(org.w3c.dom.Document doc) { if (doc == null) { return (null); }//from w w w .j a v a 2 s .c om org.dom4j.io.DOMReader xmlReader = new org.dom4j.io.DOMReader(); return (xmlReader.read(doc)); }
From source file:com.benasmussen.tools.testeditor.IdExtractor.java
License:Apache License
public Vector<Vector> parse() throws Exception { Tidy tidy = new Tidy(); tidy.setXmlOut(true);//from w w w.ja va 2s.co m tidy.setPrintBodyOnly(true); Vector<Vector> data = null; try { is = new FileInputStream(file); // jtidy parse to odm org.w3c.dom.Document parseDOM = tidy.parseDOM(is, null); // w3c to dom4j DOMReader domReader = new DOMReader(); Document document = domReader.read(parseDOM); if (logger.isDebugEnabled()) { logger.debug("XML: " + document.asXML()); } data = new Vector<Vector>(); // select all elements with attribute @id List<DefaultElement> elements = document.selectNodes("//*[@id]"); for (DefaultElement element : elements) { Vector<String> row = new Vector<String>(); row.addElement(element.attributeValue("id")); row.addElement(element.getTextTrim()); data.addElement(row); } } catch (Exception e) { throw e; } finally { IOUtils.closeQuietly(is); } return data; }
From source file:com.cladonia.xml.ExchangerDocument.java
License:Open Source License
/** * Sets the W3C Document.//w ww .j a va 2s .c om * * @param the w3c document. * @deprecated this does not work! */ public void setW3CDocument(Document document) throws DocumentException { DOMReader reader = new DOMReader(); this.document = (XDocument) reader.read(document); writeText(); // write the text out of the dom directly fireDocumentUpdated(getRoot(), ExchangerDocumentEvent.CONTENT_UPDATED); }
From source file:com.flaptor.hounder.util.HtmlParser.java
License:Apache License
/** * Parse the given html document.//from w w w . j a v a 2s .co m * @param content the html document to parse. * @return the parsed string. */ public Output parse(String url, String content) throws Exception { // <html xmlns=...> ==> <html> content = REGEXP_HTML.matcher(content).replaceFirst("<html>"); // Parser keeps state, synchronize in case its used in a multi-threaded setting. Output out = new Output(url); synchronized (this) { try { // use cyberneko to parse the html documents (even broken ones) org.xml.sax.InputSource inputSource = new org.xml.sax.InputSource( new java.io.ByteArrayInputStream(content.getBytes())); parser.parse(inputSource); } catch (Exception e) { logger.warn("Exception while trying to parse [" + content + "]"); throw e; } DOMReader reader = new DOMReader(); Document htmlDoc; try { // get the doc that resulted from parsing the text org.w3c.dom.Document document = parser.getDocument(); htmlDoc = reader.read(document); } catch (java.lang.StackOverflowError e) { logger.warn("Out of stack memory trying to parse [" + content + "]"); throw new Exception(); } // this 2 must be before the ignoreXPath, else an ignoreXPath that // includes the //TITLE will imply that the title is not indexed // extract the links extractLinks(htmlDoc, out); // extact the title extractTitle(htmlDoc, out); ignoreXpath(htmlDoc); replaceSeparatorTags(htmlDoc); // extract the text from the html tags extractText(htmlDoc.getRootElement(), out, HTMLPARSER_CONTENT); // extract special fields extractFields(htmlDoc, out); } out.close(); return out; }
From source file:com.flaptor.util.parser.HtmlParser.java
License:Apache License
/** * Parses and fixes an html byte array using Cybernecko. * //from w w w . ja v a2s. c o m * @param url The base URL for relative links * @param bytes The content * @return a dom4j Document * @throws InterruptedException * @throws Exception */ public Document getHtmlDocument(String url, byte[] bytes) throws InterruptedException, Exception { Document htmlDoc; DOMParser parser = parsers.take(); try { try { // use cyberneko to parse the html documents (even broken ones) org.xml.sax.InputSource inputSource = new org.xml.sax.InputSource( new java.io.ByteArrayInputStream(bytes)); parser.parse(inputSource); } catch (Exception e) { logger.warn("Exception while trying to parse " + url); throw e; } DOMReader reader = new DOMReader(); try { // get the doc that resulted from parsing the text org.w3c.dom.Document document = parser.getDocument(); htmlDoc = reader.read(document); } catch (java.lang.StackOverflowError e) { logger.warn("Out of stack memory trying to parse " + url); throw new Exception(e); } } finally { parsers.add(parser); } return htmlDoc; }
From source file:com.ostrichemulators.semtool.ui.components.playsheets.BrowserPlaySheet2.java
License:Open Source License
protected BufferedImage getExportImageFromSVGBlock() throws IOException { log.debug("Using SVG block to save image."); DOMReader rdr = new DOMReader(); Document doc = rdr.read(engine.getDocument()); Document svgdoc = null;/*from ww w. j a va2 s. c om*/ File svgfile = null; try { Map<String, String> namespaceUris = new HashMap<>(); namespaceUris.put("svg", "http://www.w3.org/2000/svg"); namespaceUris.put("xhtml", "http://www.w3.org/1999/xhtml"); XPath xp = DocumentHelper.createXPath("//svg:svg"); xp.setNamespaceURIs(namespaceUris); // don't forget about the styles XPath stylexp = DocumentHelper.createXPath("//xhtml:style"); stylexp.setNamespaceURIs(namespaceUris); svgdoc = DocumentHelper.createDocument(); Element svg = null; List<?> theSVGElements = xp.selectNodes(doc); if (theSVGElements.size() == 1) { svg = Element.class.cast(theSVGElements.get(0)).createCopy(); } else { int currentTop = 0; int biggestSize = 0; for (int i = 0; i < theSVGElements.size(); i++) { Element thisElement = Element.class.cast(theSVGElements.get(i)).createCopy(); int thisSize = thisElement.asXML().length(); if (thisSize > biggestSize) { currentTop = i; biggestSize = thisSize; } } svg = Element.class.cast(theSVGElements.get(currentTop)).createCopy(); } svgdoc.setRootElement(svg); Element oldstyle = Element.class.cast(stylexp.selectSingleNode(doc)); if (null != oldstyle) { Element defs = svg.addElement("defs"); Element style = defs.addElement("style"); style.addAttribute("type", "text/css"); String styledata = oldstyle.getTextTrim(); style.addCDATA(styledata); // put the stylesheet definitions first List l = svg.elements(); l.remove(defs); l.add(0, defs); } // clean up the SVG a little... // d3 comes up with coords like // M360,27475.063247863247C450,27475.063247863247 450,27269.907692307694 540,27269.907692307694 XPath cleanxp1 = DocumentHelper.createXPath("//svg:path"); Pattern pat = Pattern.compile(",([0-9]+)\\.([0-9]{1,2})[0-9]+"); cleanxp1.setNamespaceURIs(namespaceUris); List<?> cleanups = cleanxp1.selectNodes(svgdoc); for (Object n : cleanups) { Element e = Element.class.cast(n); String dstr = e.attributeValue("d"); Matcher m = pat.matcher(dstr); dstr = m.replaceAll(",$1.$2 "); e.addAttribute("d", dstr.replaceAll("([0-9])C([0-9])", "$1 C$2").trim()); } XPath cleanxp2 = DocumentHelper.createXPath("//svg:g[@class='node']"); cleanxp2.setNamespaceURIs(namespaceUris); cleanups = cleanxp2.selectNodes(svgdoc); for (Object n : cleanups) { Element e = Element.class.cast(n); String dstr = e.attributeValue("transform"); Matcher m = pat.matcher(dstr); dstr = m.replaceAll(",$1.$2"); e.addAttribute("transform", dstr.trim()); } svgfile = File.createTempFile("graphviz-", ".svg"); try (Writer svgw = new BufferedWriter(new FileWriter(svgfile))) { OutputFormat format = OutputFormat.createPrettyPrint(); XMLWriter xmlw = new XMLWriter(svgw, format); xmlw.write(svgdoc); xmlw.close(); if (log.isDebugEnabled()) { FileUtils.copyFile(svgfile, new File(FileUtils.getTempDirectory(), "graphvisualization.svg")); } } try (Reader svgr = new BufferedReader(new FileReader(svgfile))) { TranscoderInput inputSvg = new TranscoderInput(svgr); ByteArrayOutputStream baos = new ByteArrayOutputStream((int) svgfile.length()); TranscoderOutput outputPng = new TranscoderOutput(baos); try { PNGTranscoder transcoder = new PNGTranscoder(); transcoder.addTranscodingHint(PNGTranscoder.KEY_INDEXED, 256); transcoder.addTranscodingHint(ImageTranscoder.KEY_BACKGROUND_COLOR, Color.WHITE); transcoder.transcode(inputSvg, outputPng); } catch (Throwable t) { log.error(t, t); } baos.flush(); baos.close(); return ImageIO.read(new ByteArrayInputStream(baos.toByteArray())); } } catch (InvalidXPathException e) { log.error(e); String msg = "Problem creating image"; if (null != svgdoc) { try { File errsvg = new File(FileUtils.getTempDirectory(), "graphvisualization.svg"); FileUtils.write(errsvg, svgdoc.asXML(), Charset.defaultCharset()); msg = "Could not create the image. SVG data store here: " + errsvg.getAbsolutePath(); } catch (IOException ex) { // don't care } } throw new IOException(msg, e); } finally { if (null != svgfile) { FileUtils.deleteQuietly(svgfile); } } }
From source file:com.zimbra.common.soap.W3cDomUtil.java
License:Open Source License
/** * Note: DOCTYPE is disallowed for reasons of security and protection against denial of service * @throws XmlParseException// ww w . ja v a 2 s. co m */ public static org.dom4j.Document parseXMLToDom4jDocUsingSecureProcessing(InputStream is) throws XmlParseException { org.w3c.dom.Document w3cDoc = W3cDomUtil.parseXMLToDoc(is); DOMReader reader = new DOMReader(); return reader.read(w3cDoc); }
From source file:cz.fi.muni.xkremser.editor.server.newObject.CreateObjectUtils.java
License:Open Source License
private static void append(NewDigitalObject parrent, NewDigitalObject child) throws CreateObjectException { org.w3c.dom.Document doc = null; try {/*from w ww. j a v a2s . c o m*/ doc = fedoraAccess.getRelsExt(parrent.getUuid()); } catch (IOException e) { LOGGER.error(e.getMessage()); e.printStackTrace(); throw new CreateObjectException("Unable to append " + child.getName() + " (" + child.getUuid() + ") to parrent named " + parrent.getName() + " (" + parrent.getUuid() + ")!"); } DOMReader domReader = new DOMReader(); Document document = domReader.read(doc); RelsExtRelation rel = new RelsExtRelation(child.getUuid(), NamedGraphModel.getRelationship(parrent.getModel(), child.getModel()), child.getName()); FoxmlUtils.addRelationshipToRelsExt(document, rel); FedoraUtils.putRelsExt(parrent.getUuid(), document.asXML(), false); }