List of usage examples for org.w3c.dom.html HTMLDocument createDocumentFragment
public DocumentFragment createDocumentFragment();
DocumentFragment
object. From source file:com.ewcms.common.io.HtmlStringUtil.java
public static String getPureText(String html) { try {// www. ja v a2s . c om DOMFragmentParser parser; org.w3c.dom.DocumentFragment fragment; parser = new DOMFragmentParser(); HTMLDocument document = new HTMLDocumentImpl(); fragment = document.createDocumentFragment(); String txt; parser.parse(new InputSource(new StringReader(html)), fragment); txt = getPureText(((Node) (fragment))); return htmlDecode(txt); } catch (Exception e) { logger.error("XML?"); return null; } }
From source file:de.innovationgate.wga.server.api.Html.java
/** * Parses HTML text and returns it as DOM document object. The returned document represents only the parsed fragment. * The HTML parser NekoHTML is responsible for parsing the HTML document as a DOM tree. It is tolerant against most "common errors" done on HTML documents and may parse a wide range of HTML structures. * Note: NekoHTML converts all HTML tags name to uppercase even if they were specified as lowercase in the source code. This is important when querying the DOM via XPath. * @param html/* w w w. java 2s .c o m*/ * @return The DOM document of the parsed HTML * @throws SAXException * @throws IOException */ public Document parseFragment(String html) throws WGException, SAXException, IOException { HTMLDocument document = new HTMLDocumentImpl(); DOMFragmentParser parser = new DOMFragmentParser(); DocumentFragment frag = document.createDocumentFragment(); parser.parse(new InputSource(new StringReader(html)), frag); document.appendChild(frag); DOMReader xmlReader = new DOMReader(); return xmlReader.read(document); }
From source file:jef.tools.XMLUtils.java
/** * HTML// w w w . j av a 2s. co m * * @param in * ? * @return DocumentFragment DOM * @throws SAXException * ? * @throws IOException * */ public static DocumentFragment parseHTML(Reader in) throws SAXException, IOException { if (parser == null) throw new UnsupportedOperationException( "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath"); InputSource source; source = new InputSource(in); synchronized (parser) { HTMLDocument document = new HTMLDocumentImpl(); DocumentFragment fragment = document.createDocumentFragment(); parser.parse(source, fragment); return fragment; } }
From source file:jef.tools.XMLUtils.java
/** * ??HTML// w w w. j ava 2 s . co m * * @param in * ? * @param charSet * null * @return ??DocumentFragment * @throws SAXException * XML * @throws IOException * IO? */ public static DocumentFragment parseHTML(InputStream in, String charSet) throws SAXException, IOException { if (parser == null) throw new UnsupportedOperationException( "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath"); InputSource source; if (charSet != null) { source = new InputSource(new XmlFixedReader(new InputStreamReader(in, charSet))); source.setEncoding(charSet); } else { source = new InputSource(in); } synchronized (parser) { HTMLDocument document = new HTMLDocumentImpl(); DocumentFragment fragment = document.createDocumentFragment(); parser.parse(source, fragment); return fragment; } }
From source file:org.pentaho.di.www.CarteIT.java
public static Node parse(String content) throws SAXException, IOException { DOMFragmentParser parser = new DOMFragmentParser(); HTMLDocument document = new HTMLDocumentImpl(); DocumentFragment fragment = document.createDocumentFragment(); InputSource is = new InputSource(new StringReader(content)); parser.parse(is, fragment);/*from w w w . jav a 2s .c o m*/ return fragment; }