Example usage for org.w3c.dom.html HTMLDocument createDocumentFragment

Introduction

In this page you can find the example usage for org.w3c.dom.html HTMLDocument createDocumentFragment.

Prototype

public DocumentFragment createDocumentFragment();

Source Link

Document

Creates an empty DocumentFragment object.

Usage

From source file:com.ewcms.common.io.HtmlStringUtil.java

public static String getPureText(String html) {
    try {// www.  ja  v  a2s .  c om
        DOMFragmentParser parser;
        org.w3c.dom.DocumentFragment fragment;
        parser = new DOMFragmentParser();
        HTMLDocument document = new HTMLDocumentImpl();
        fragment = document.createDocumentFragment();
        String txt;
        parser.parse(new InputSource(new StringReader(html)), fragment);
        txt = getPureText(((Node) (fragment)));
        return htmlDecode(txt);
    } catch (Exception e) {
        logger.error("XML?");
        return null;
    }
}

From source file:de.innovationgate.wga.server.api.Html.java

/**
 * Parses HTML text and returns it as DOM document object. The returned document represents only the parsed fragment.
 * The HTML parser NekoHTML is responsible for parsing the HTML document as a DOM tree. It is tolerant against most "common errors" done on HTML documents and may parse a wide range of HTML structures. 
 * Note: NekoHTML converts all HTML tags name to uppercase even if they were specified as lowercase in the source code. This is important when querying the DOM via XPath.
 * @param html/*  w  w  w. java  2s .c o  m*/
 * @return The DOM document of the parsed HTML
 * @throws SAXException
 * @throws IOException
 */
public Document parseFragment(String html) throws WGException, SAXException, IOException {

    HTMLDocument document = new HTMLDocumentImpl();
    DOMFragmentParser parser = new DOMFragmentParser();
    DocumentFragment frag = document.createDocumentFragment();
    parser.parse(new InputSource(new StringReader(html)), frag);
    document.appendChild(frag);
    DOMReader xmlReader = new DOMReader();
    return xmlReader.read(document);

}

From source file:jef.tools.XMLUtils.java

/**
 * HTML//  w  w w .  j  av  a 2s. co  m
 * 
 * @param in
 *            ?
 * @return DocumentFragment DOM
 * @throws SAXException
 *             ?
 * @throws IOException
 *             
 */
public static DocumentFragment parseHTML(Reader in) throws SAXException, IOException {
    if (parser == null)
        throw new UnsupportedOperationException(
                "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath");
    InputSource source;
    source = new InputSource(in);
    synchronized (parser) {
        HTMLDocument document = new HTMLDocumentImpl();
        DocumentFragment fragment = document.createDocumentFragment();
        parser.parse(source, fragment);
        return fragment;
    }
}

From source file:jef.tools.XMLUtils.java

/**
 * ??HTML// w  w w.  j ava  2  s . co m
 * 
 * @param in
 *            ?
 * @param charSet
 *            null
 * @return ??DocumentFragment
 * @throws SAXException
 *             XML
 * @throws IOException
 *             IO?
 */
public static DocumentFragment parseHTML(InputStream in, String charSet) throws SAXException, IOException {
    if (parser == null)
        throw new UnsupportedOperationException(
                "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath");
    InputSource source;
    if (charSet != null) {
        source = new InputSource(new XmlFixedReader(new InputStreamReader(in, charSet)));
        source.setEncoding(charSet);
    } else {
        source = new InputSource(in);
    }
    synchronized (parser) {
        HTMLDocument document = new HTMLDocumentImpl();
        DocumentFragment fragment = document.createDocumentFragment();
        parser.parse(source, fragment);
        return fragment;
    }
}

From source file:org.pentaho.di.www.CarteIT.java

public static Node parse(String content) throws SAXException, IOException {
    DOMFragmentParser parser = new DOMFragmentParser();
    HTMLDocument document = new HTMLDocumentImpl();
    DocumentFragment fragment = document.createDocumentFragment();

    InputSource is = new InputSource(new StringReader(content));
    parser.parse(is, fragment);/*from  w  w  w . jav a 2s  .c  o m*/
    return fragment;
}