Example usage for org.w3c.dom.html HTMLDocument createDocumentFragment

List of usage examples for org.w3c.dom.html HTMLDocument createDocumentFragment

Introduction

In this page you can find the example usage for org.w3c.dom.html HTMLDocument createDocumentFragment.

Prototype

public DocumentFragment createDocumentFragment();

Source Link

Document

Creates an empty DocumentFragment object.

Usage

From source file:com.ewcms.common.io.HtmlStringUtil.java

public static String getPureText(String html) {
    try {// www.  ja  v  a2s .  c om
        DOMFragmentParser parser;
        org.w3c.dom.DocumentFragment fragment;
        parser = new DOMFragmentParser();
        HTMLDocument document = new HTMLDocumentImpl();
        fragment = document.createDocumentFragment();
        String txt;
        parser.parse(new InputSource(new StringReader(html)), fragment);
        txt = getPureText(((Node) (fragment)));
        return htmlDecode(txt);
    } catch (Exception e) {
        logger.error("XML?");
        return null;
    }
}

From source file:de.innovationgate.wga.server.api.Html.java

/**
 * Parses HTML text and returns it as DOM document object. The returned document represents only the parsed fragment.
 * The HTML parser NekoHTML is responsible for parsing the HTML document as a DOM tree. It is tolerant against most "common errors" done on HTML documents and may parse a wide range of HTML structures. 
 * Note: NekoHTML converts all HTML tags name to uppercase even if they were specified as lowercase in the source code. This is important when querying the DOM via XPath.
 * @param html/*  w  w  w. java  2s .c o  m*/
 * @return The DOM document of the parsed HTML
 * @throws SAXException
 * @throws IOException
 */
public Document parseFragment(String html) throws WGException, SAXException, IOException {

    HTMLDocument document = new HTMLDocumentImpl();
    DOMFragmentParser parser = new DOMFragmentParser();
    DocumentFragment frag = document.createDocumentFragment();
    parser.parse(new InputSource(new StringReader(html)), frag);
    document.appendChild(frag);
    DOMReader xmlReader = new DOMReader();
    return xmlReader.read(document);

}

From source file:jef.tools.XMLUtils.java

/**
 * HTML//  w  w w .  j  av  a 2s. co  m
 * 
 * @param in
 *            ?
 * @return DocumentFragment DOM
 * @throws SAXException
 *             ?
 * @throws IOException
 *             
 */
public static DocumentFragment parseHTML(Reader in) throws SAXException, IOException {
    if (parser == null)
        throw new UnsupportedOperationException(
                "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath");
    InputSource source;
    source = new InputSource(in);
    synchronized (parser) {
        HTMLDocument document = new HTMLDocumentImpl();
        DocumentFragment fragment = document.createDocumentFragment();
        parser.parse(source, fragment);
        return fragment;
    }
}

From source file:jef.tools.XMLUtils.java

/**
 * ??HTML// w  w w.  j ava  2  s . co m
 * 
 * @param in
 *            ?
 * @param charSet
 *            null
 * @return ??DocumentFragment
 * @throws SAXException
 *             XML
 * @throws IOException
 *             IO?
 */
public static DocumentFragment parseHTML(InputStream in, String charSet) throws SAXException, IOException {
    if (parser == null)
        throw new UnsupportedOperationException(
                "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath");
    InputSource source;
    if (charSet != null) {
        source = new InputSource(new XmlFixedReader(new InputStreamReader(in, charSet)));
        source.setEncoding(charSet);
    } else {
        source = new InputSource(in);
    }
    synchronized (parser) {
        HTMLDocument document = new HTMLDocumentImpl();
        DocumentFragment fragment = document.createDocumentFragment();
        parser.parse(source, fragment);
        return fragment;
    }
}

From source file:org.pentaho.di.www.CarteIT.java

public static Node parse(String content) throws SAXException, IOException {
    DOMFragmentParser parser = new DOMFragmentParser();
    HTMLDocument document = new HTMLDocumentImpl();
    DocumentFragment fragment = document.createDocumentFragment();

    InputSource is = new InputSource(new StringReader(content));
    parser.parse(is, fragment);/*from  w  w  w . jav a 2s  .c  o m*/
    return fragment;
}