Example usage for javax.swing.text.html.parser ParserDelegator parse

Introduction

In this page you can find the example usage for javax.swing.text.html.parser ParserDelegator parse.

Prototype

public void parse(Reader r, HTMLEditorKit.ParserCallback cb, boolean ignoreCharSet) throws IOException

Source Link

Usage

From source file:Main.java

public static void main(String[] args) throws Exception {
    final List<String> list = new ArrayList<String>();
    ParserDelegator parserDelegator = new ParserDelegator();
    ParserCallback parserCallback = new ParserCallback() {
        public void handleText(final char[] data, final int pos) {
            list.add(new String(data));
        }// w w w.j  ava  2  s  . com

        public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) {
        }

        public void handleEndTag(Tag t, final int pos) {
        }

        public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) {
        }

        public void handleComment(final char[] data, final int pos) {
        }

        public void handleError(final java.lang.String errMsg, final int pos) {
        }
    };
    parserDelegator.parse(new FileReader("a.html"), parserCallback, true);
    System.out.println(list);
}

From source file:Main.java

public final static void main(String[] args) throws Exception {
    final ArrayList<String> list = new ArrayList<String>();

    ParserDelegator parserDelegator = new ParserDelegator();
    ParserCallback parserCallback = new ParserCallback() {
        public void handleText(final char[] data, final int pos) {
        }/*from w w  w .j  a va 2s  .  com*/

        public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) {
            if (tag == Tag.A) {
                String address = (String) attribute.getAttribute(Attribute.HREF);
                list.add(address);
            }
        }

        public void handleEndTag(Tag t, final int pos) {
        }

        public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) {
        }

        public void handleComment(final char[] data, final int pos) {
        }

        public void handleError(final java.lang.String errMsg, final int pos) {
        }
    };
    parserDelegator.parse(new FileReader("a.html"), parserCallback, false);
    System.out.println(list);
}

From source file:Main.java

public static void setPaneHtmlText(String htmlText, JEditorPane pane) {
    if (htmlText == null) {
        pane.setText("");
        return;/*from w w w  . ja  va2 s.c o m*/
    } else if (htmlText.length() == 0) {
        pane.setText("");
        return;
    }

    StringReader htmReader = new StringReader(htmlText);
    HTMLEditorKit kit = (HTMLEditorKit) pane.getEditorKitForContentType("text/html");
    HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument();

    ParserDelegator parser = new ParserDelegator();
    try {
        parser.parse(htmReader, doc.getReader(0), true);
    } catch (IOException ex) {
        ex.printStackTrace();
    }
    pane.setDocument(doc);
}

From source file:edu.cornell.mannlib.vitro.webapp.utils.Html2Text.java

public void parse(Reader in) throws IOException {
    s = new StringBuffer();
    ParserDelegator delegator = new ParserDelegator();
    // the third parameter is TRUE to ignore charset directive
    delegator.parse(in, this, Boolean.TRUE);
}

From source file:com.net2plan.utils.HTMLUtils.java

private static String prepareImagePath(String html, URL url) {
    final Set<String> list = new TreeSet<String>();
    final ParserDelegator parserDelegator = new ParserDelegator();
    final HTMLEditorKit.ParserCallback parserCallback = new HTMLEditorKit.ParserCallback() {
        @Override//from   ww w  . ja v a 2 s . c  o m
        public void handleText(final char[] data, final int pos) {
        }

        @Override
        public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribute, int pos) {
            if (tag == HTML.Tag.IMG) {
                String address = (String) attribute.getAttribute(HTML.Attribute.SRC);
                list.add(address);
            }
        }

        @Override
        public void handleEndTag(HTML.Tag t, final int pos) {
        }

        @Override
        public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, final int pos) {
            if (t == HTML.Tag.IMG) {
                String address = (String) a.getAttribute(HTML.Attribute.SRC);
                list.add(address);
            }
        }

        @Override
        public void handleComment(final char[] data, final int pos) {
        }

        @Override
        public void handleError(final String errMsg, final int pos) {
        }
    };

    final Reader reader = new StringReader(html);
    try {
        parserDelegator.parse(reader, parserCallback, true);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    for (String item : list) {
        try {
            URL newURL = new URL(url, item);
            html = html.replace(item, newURL.toExternalForm());
        } catch (Throwable e) {
            throw new RuntimeException(e);
        }
    }

    return html;
}

From source file:gov.va.med.pharmacy.peps.presentation.common.displaytag.CsvView.java

/**
 * Parse HTML to String//from  www.j  av a 2s .c o  m
 * @param html Input String
 * @return text String value
 * @throws IOException IOException
 */
String extractText(String html) throws IOException {
    final ArrayList<String> list = new ArrayList<String>();

    ParserDelegator parserDelegator = new ParserDelegator();
    ParserCallback parserCallback = new ParserCallback() {

        public void handleText(final char[] data, final int pos) {
            list.add(new String(data));
        }

        public void handleEndTag(Tag t, final int pos) {
        }

        public void handleComment(final char[] data, final int pos) {
        }

        public void handleError(final java.lang.String errMsg, final int pos) {
        }
    };
    parserDelegator.parse(new StringReader(html), parserCallback, true);

    String text = "";

    for (String s : list) {
        text += " " + s;
    }

    return text;
}

From source file:gov.va.med.pharmacy.peps.presentation.common.displaytag.DefaultHssfExportView.java

/**
 * Parse HTML to String/*from w  ww .  j  a  v a2 s . co  m*/
 * @param html Input String to convert
 * @return text String value that is returned
 * @throws IOException IOException
 */
String extractText(String html) throws IOException {
    final ArrayList<String> list = new ArrayList<String>();

    // instantiate the instanced of the delegator and callback classes for parsing
    ParserDelegator parserDelegator = new ParserDelegator();
    ParserCallback parserCallback = new ParserCallback() {

        public void handleText(final char[] data, final int pos) {
            list.add(new String(data));
        }

        // handle the end tag
        public void handleEndTag(Tag t, final int pos) {
        }

        // handle comments
        public void handleComment(final char[] data, final int pos) {
        }

        // handle errors
        public void handleError(final java.lang.String errMsg, final int pos) {
        }
    };
    parserDelegator.parse(new StringReader(html), parserCallback, true);

    StringBuffer text = new StringBuffer("");

    // add a space as necessary
    for (String s : list) {
        text.append(" ").append(s);
    }

    return text.toString();
}

From source file:com.clustercontrol.http.util.GetHttpResponse.java

/**
 * URL??/*w  w w.  jav a 2s.c  om*/
 * 
 * @param url URL
 * @param timeout 
 * @return
 * @throws KeyStoreException
 * @throws NoSuchAlgorithmException
 * @throws KeyManagementException
 * @throws IOException
 * @throws ClientProtocolException
 */
public boolean execute(String url, String post) {
    Response result = new Response();
    try {
        CloseableHttpClient client = getHttpClient();

        result.url = url;
        if (m_authType != null && !AuthType.NONE.equals(m_authType)) {
            URI uri = new URI(url);

            Credentials credential = null;
            String authSchema = null;
            switch (m_authType) {
            case BASIC:
                credential = new UsernamePasswordCredentials(m_authUser, m_authPassword);
                authSchema = "basic";
                break;
            case NTLM:
                credential = new NTCredentials(m_authUser, m_authPassword, null, null);
                authSchema = "ntlm";
                break;
            case DIGEST:
                credential = new UsernamePasswordCredentials(m_authUser, m_authPassword);
                authSchema = "digest";
                break;
            default:
                m_log.warn("Auth type is unexpected value. AuthType = " + m_authType.name());
            }

            if (credential != null) {
                AuthScope scope = new AuthScope(uri.getHost(), uri.getPort(), AuthScope.ANY_REALM, authSchema);
                if (m_cledentialProvider.getCredentials(scope) == null) {
                    m_cledentialProvider.setCredentials(scope, credential);
                }
            }
        }

        HttpRequestBase request = null;
        if (post != null && !post.isEmpty()) {
            List<NameValuePair> urlParameters = new ArrayList<NameValuePair>();

            for (String ss : post.split("&")) {
                int index = ss.indexOf("=");
                if (index <= 0) {
                    continue;
                }
                urlParameters.add(new BasicNameValuePair(ss.substring(0, index), ss.substring(index + 1)));
            }
            if (m_log.isTraceEnabled()) {
                m_log.trace("post1=" + post + ", post2=" + urlParameters);
            }

            HttpPost requestPost = new HttpPost(url);
            Charset charset = Consts.UTF_8;
            try {
                charset = Charset.forName(
                        HinemosPropertyUtil.getHinemosPropertyStr("monitor.http.post.charset", "UTF-8"));
            } catch (UnsupportedCharsetException e) {
                m_log.warn("UnsupportedCharsetException " + e.getMessage());
            }
            requestPost.setEntity(new UrlEncodedFormEntity(urlParameters, charset));
            requestPost.addHeader(HTTP.CONTENT_TYPE, "application/x-www-form-urlencoded");
            request = requestPost;
        } else {
            request = new HttpGet(url);
        }

        // Execute the method.
        try {
            long start = HinemosTime.currentTimeMillis();
            HttpResponse response = client.execute(request);
            result.responseTime = HinemosTime.currentTimeMillis() - start;

            result.statusCode = response.getStatusLine().getStatusCode();

            // Header
            Header[] headers = response.getAllHeaders();
            if (headers != null && headers.length > 0) {
                StringBuffer header = new StringBuffer();
                for (int i = 0; i < headers.length; i++) {
                    header.append((i != 0 ? "\n" : "") + headers[i]);
                }
                result.headerString = header.toString();
                result.headers = Arrays.asList(headers);
            }

            if (result.statusCode == HttpStatus.SC_OK) {
                result.success = true;

                // Content-Type?text?????Body?
                Header header = response.getFirstHeader(HTTP.CONTENT_TYPE);

                boolean contentTypeFlag = false;
                String[] contentTypes = HinemosPropertyUtil
                        .getHinemosPropertyStr(TARGET_CONTENT_TYPE_KEY, "text").split(",");

                if (header != null && header.getValue() != null) {
                    String value = header.getValue();
                    for (String contentType : contentTypes) {
                        if (value.indexOf(contentType) != -1) {
                            contentTypeFlag = true;
                            break;
                        }
                    }
                }

                if (contentTypeFlag) {
                    ByteArrayOutputStream out = new ByteArrayOutputStream();
                    try (InputStream in = response.getEntity().getContent()) {
                        byte[] buffer = new byte[BUFF_SIZE];
                        while (out.size() < BODY_MAX_SIZE) {
                            int len = in.read(buffer);
                            if (len < 0) {
                                break;
                            }
                            out.write(buffer, 0, len);
                        }
                    }

                    // ????HTTP ? meta ???????
                    // HTTP ??
                    //
                    // Content-Type: text/html; charset=euc-jp
                    //
                    // meta ?
                    //
                    // <meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
                    // <meta charset="euc-jp">
                    //
                    // HTML ???meta ?????
                    //
                    // ????????????????
                    // ???????????????????
                    // ????????????????
                    //
                    // ??????????????????
                    //
                    // 1. HTTP ? Content-Type ? charset ????????
                    // ?????????
                    //
                    // 2. ????????"JISAutoDetect" ???
                    // ????
                    //
                    // 3. ??????meta ??
                    //
                    // 4. meta ??????????
                    // ????????
                    // ???????????

                    String charset = "JISAutoDetect";
                    Matcher m = chasetPattern.matcher(header.getValue());
                    if (m.matches())
                        charset = m.group(1);

                    String content = new String(out.toByteArray(), charset);

                    CharsetParser parser = new CharsetParser();
                    ParserDelegator p = new ParserDelegator();
                    p.parse(new StringReader(content), parser, true);

                    if (parser.charset != null && !charset.equals(parser.charset)) {
                        charset = parser.charset;
                        content = new String(out.toByteArray(), charset);
                    }
                    result.responseBody = content;
                } else {
                    result.errorMessage = MessageConstant.MESSAGE_FAIL_TO_CHECK_NOT_TEXT.getMessage();
                }
            } else {
                result.errorMessage = response.getStatusLine().toString();
            }
        } finally {
            request.releaseConnection();
        }
    } catch (UnsupportedEncodingException e) {
        m_log.info("execute(): " + e.getMessage() + " class=" + e.getClass().getName());
        result.errorMessage = "http receiving failure. (unsupported encoding)";
        result.exception = e;
    } catch (IOException e) {
        m_log.info("execute(): Fatal transport error. " + e.getMessage() + " class=" + e.getClass().getName());
        result.errorMessage = "http requesting failure. (I/O error : unreachable or timeout)";
        result.exception = e;
    } catch (Exception e) {
        m_log.info("execute(): " + e.getMessage() + " class=" + e.getClass().getName());
        result.errorMessage = "http requesting failure. " + e.getMessage() + "(" + e.getClass().getSimpleName()
                + ")";
        result.exception = e;
    }

    m_requestResult = result;

    return m_requestResult.success;
}

From source file:de.innovationgate.utils.WGUtils.java

/**
 * Converts a HTML to plain text by removing all HTML tags.
 * // ww  w.  j  a v a 2 s  . c  om
 * @param html
 *            The html
 * @param divider
 *            The divider by which separate text fragments that were parsed
 *            from the HTML should be divided.
 * @param ignoreWhitespace
 *            Specify true if pure whitespace text fragments should be
 *            ignored
 * @return The plain text
 * @throws IOException
 */
public static String toPlainText(String html, String divider, boolean ignoreWhitespace) throws IOException {

    // First remove data URLs from code which may bloat the process
    html = WGUtils.strReplace(html, "src=\"data:", new ReplaceProcessor() {

        @Override
        public int replace(String text, int from, int to, Writer out) throws IOException {

            int linkEnd = text.indexOf("\"", to);
            out.write("src=\"");
            if (linkEnd != -1) {
                return linkEnd;
            } else {
                return text.length() - 1;
            }
        }

    }, true);

    html = WGUtils.strReplace(html, "href=\"data:", new ReplaceProcessor() {

        @Override
        public int replace(String text, int from, int to, Writer out) throws IOException {

            int linkEnd = text.indexOf("\"", to);
            out.write("href=\"");
            if (linkEnd != -1) {
                return linkEnd;
            } else {
                return text.length() - 1;
            }
        }

    }, true);

    // Convert to plaintext
    PlainTextParserCallback callback = new PlainTextParserCallback(ignoreWhitespace, divider);
    ParserDelegator parserDelegator = new javax.swing.text.html.parser.ParserDelegator();
    parserDelegator.parse(new java.io.StringReader(html), callback, true);
    return callback.getText();

}