List of usage examples for org.w3c.dom DocumentFragment hasChildNodes
public boolean hasChildNodes();
From source file:org.apache.nutch.parse.html.HtmlParser.java
private DocumentFragment parseNeko(InputSource input) throws Exception { DOMFragmentParser parser = new DOMFragmentParser(); try {/*w ww .j a v a 2 s. com*/ parser.setFeature("http://cyberneko.org/html/features/augmentations", true); parser.setProperty("http://cyberneko.org/html/properties/default-encoding", defaultCharEncoding); parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true); parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", false); parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); parser.setFeature("http://cyberneko.org/html/features/report-errors", LOG.isTraceEnabled()); } catch (SAXException e) { } // convert Document to DocumentFragment HTMLDocumentImpl doc = new HTMLDocumentImpl(); doc.setErrorChecking(false); DocumentFragment res = doc.createDocumentFragment(); DocumentFragment frag = doc.createDocumentFragment(); parser.parse(input, frag); res.appendChild(frag); try { while (true) { frag = doc.createDocumentFragment(); parser.parse(input, frag); if (!frag.hasChildNodes()) break; if (LOG.isInfoEnabled()) { LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes."); } res.appendChild(frag); } } catch (Exception x) { x.printStackTrace(LogUtil.getWarnStream(LOG)); } ; return res; }
From source file:org.apache.nutch.store.readable.StoreReadable.java
private DocumentFragment parseNeko(InputSource input) throws Exception { System.out.println("[STORE-READABLE]----------------------------------------------------parseNeko"); DOMFragmentParser parser = new DOMFragmentParser(); try {/*from w ww . ja v a 2 s . com*/ parser.setFeature("http://cyberneko.org/html/features/scanner/allow-selfclosing-iframe", true); parser.setFeature("http://cyberneko.org/html/features/augmentations", true); parser.setProperty("http://cyberneko.org/html/properties/default-encoding", defaultCharEncoding); parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true); parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", false); parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); parser.setFeature("http://cyberneko.org/html/features/report-errors", LOG.isTraceEnabled()); } catch (SAXException e) { } // convert Document to DocumentFragment HTMLDocumentImpl doc = new HTMLDocumentImpl(); doc.setErrorChecking(false); DocumentFragment res = doc.createDocumentFragment(); DocumentFragment frag = doc.createDocumentFragment(); parser.parse(input, frag); res.appendChild(frag); try { while (true) { frag = doc.createDocumentFragment(); parser.parse(input, frag); if (!frag.hasChildNodes()) break; if (LOG.isInfoEnabled()) { LOG.info(" - new frag, " + frag.getChildNodes().getLength() + " nodes."); } res.appendChild(frag); } } catch (Exception x) { LOG.error("Failed with the following Exception: ", x); } ; return res; }
From source file:org.dita.dost.writer.ConrefPushParser.java
/** * The function is to judge if the pushed content type march the type of content being pushed/replaced * @param targetClassAttribute the class attribute of target element which is being pushed * @param content pushedContent/* w w w .j a va2s. c o m*/ * @return boolean: if type match, return true, else return false */ private boolean isPushedTypeMatch(final DitaClass targetClassAttribute, final DocumentFragment content) { DitaClass clazz = null; if (content.hasChildNodes()) { final NodeList nodeList = content.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { final Node node = nodeList.item(i); if (node.getNodeType() == Node.ELEMENT_NODE) { final Element elem = (Element) node; clazz = new DitaClass(elem.getAttribute(ATTRIBUTE_NAME_CLASS)); break; // get type of the target element } } } return targetClassAttribute.matches(clazz); }
From source file:org.dita.dost.writer.ConrefPushParser.java
/** * // w w w.j a v a 2s . co m * @param targetClassAttribute targetClassAttribute * @param content string * @return string */ private DocumentFragment replaceElementName(final DitaClass targetClassAttribute, final DocumentFragment content) { try { if (content.hasChildNodes()) { final NodeList nodeList = content.getChildNodes(); for (int i = 0; i < nodeList.getLength(); i++) { final Node node = nodeList.item(i); if (node.getNodeType() == Node.ELEMENT_NODE) { final Element elem = (Element) node; final DitaClass clazz = DitaClass.getInstance(elem); // get type of the target element final String type = targetClassAttribute.toString() .substring(1, targetClassAttribute.toString().indexOf("/")).trim(); if (!clazz.equals(targetClassAttribute) && targetClassAttribute.matches(clazz)) { // Specializing the pushing content is not handled here // but we can catch such a situation to emit a warning by comparing the class values. final String targetElementName = targetClassAttribute.toString() .substring(targetClassAttribute.toString().indexOf("/") + 1).trim(); if (elem.getAttributeNode(ATTRIBUTE_NAME_CONREF) != null) { hasConref = true; } if (elem.getAttributeNode(ATTRIBUTE_NAME_KEYREF) != null) { hasKeyref = true; } elem.getOwnerDocument().renameNode(elem, elem.getNamespaceURI(), targetElementName); // process the child nodes of the current node final NodeList nList = elem.getChildNodes(); for (int j = 0; j < nList.getLength(); j++) { final Node subNode = nList.item(j); if (subNode.getNodeType() == Node.ELEMENT_NODE) { //replace the subElement Name replaceSubElementName(type, (Element) subNode); } } } else { replaceSubElementName(STRING_BLANK, elem); } } } } } catch (final Exception e) { e.printStackTrace(); } return content; }