List of usage examples for javax.swing.text.html.parser ParserDelegator parse
public void parse(Reader r, HTMLEditorKit.ParserCallback cb, boolean ignoreCharSet) throws IOException
From source file:Main.java
public static void main(String[] args) throws Exception { final List<String> list = new ArrayList<String>(); ParserDelegator parserDelegator = new ParserDelegator(); ParserCallback parserCallback = new ParserCallback() { public void handleText(final char[] data, final int pos) { list.add(new String(data)); }// w w w.j ava 2 s . com public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) { } public void handleEndTag(Tag t, final int pos) { } public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) { } public void handleComment(final char[] data, final int pos) { } public void handleError(final java.lang.String errMsg, final int pos) { } }; parserDelegator.parse(new FileReader("a.html"), parserCallback, true); System.out.println(list); }
From source file:Main.java
public final static void main(String[] args) throws Exception { final ArrayList<String> list = new ArrayList<String>(); ParserDelegator parserDelegator = new ParserDelegator(); ParserCallback parserCallback = new ParserCallback() { public void handleText(final char[] data, final int pos) { }/*from w w w .j a va 2s . com*/ public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) { if (tag == Tag.A) { String address = (String) attribute.getAttribute(Attribute.HREF); list.add(address); } } public void handleEndTag(Tag t, final int pos) { } public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) { } public void handleComment(final char[] data, final int pos) { } public void handleError(final java.lang.String errMsg, final int pos) { } }; parserDelegator.parse(new FileReader("a.html"), parserCallback, false); System.out.println(list); }
From source file:Main.java
public static void setPaneHtmlText(String htmlText, JEditorPane pane) { if (htmlText == null) { pane.setText(""); return;/*from w w w . ja va2 s.c o m*/ } else if (htmlText.length() == 0) { pane.setText(""); return; } StringReader htmReader = new StringReader(htmlText); HTMLEditorKit kit = (HTMLEditorKit) pane.getEditorKitForContentType("text/html"); HTMLDocument doc = (HTMLDocument) kit.createDefaultDocument(); ParserDelegator parser = new ParserDelegator(); try { parser.parse(htmReader, doc.getReader(0), true); } catch (IOException ex) { ex.printStackTrace(); } pane.setDocument(doc); }
From source file:edu.cornell.mannlib.vitro.webapp.utils.Html2Text.java
public void parse(Reader in) throws IOException { s = new StringBuffer(); ParserDelegator delegator = new ParserDelegator(); // the third parameter is TRUE to ignore charset directive delegator.parse(in, this, Boolean.TRUE); }
From source file:com.net2plan.utils.HTMLUtils.java
private static String prepareImagePath(String html, URL url) { final Set<String> list = new TreeSet<String>(); final ParserDelegator parserDelegator = new ParserDelegator(); final HTMLEditorKit.ParserCallback parserCallback = new HTMLEditorKit.ParserCallback() { @Override//from ww w . ja v a 2 s . c o m public void handleText(final char[] data, final int pos) { } @Override public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribute, int pos) { if (tag == HTML.Tag.IMG) { String address = (String) attribute.getAttribute(HTML.Attribute.SRC); list.add(address); } } @Override public void handleEndTag(HTML.Tag t, final int pos) { } @Override public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, final int pos) { if (t == HTML.Tag.IMG) { String address = (String) a.getAttribute(HTML.Attribute.SRC); list.add(address); } } @Override public void handleComment(final char[] data, final int pos) { } @Override public void handleError(final String errMsg, final int pos) { } }; final Reader reader = new StringReader(html); try { parserDelegator.parse(reader, parserCallback, true); } catch (IOException e) { throw new RuntimeException(e); } for (String item : list) { try { URL newURL = new URL(url, item); html = html.replace(item, newURL.toExternalForm()); } catch (Throwable e) { throw new RuntimeException(e); } } return html; }
From source file:gov.va.med.pharmacy.peps.presentation.common.displaytag.CsvView.java
/** * Parse HTML to String//from www.j av a 2s .c o m * @param html Input String * @return text String value * @throws IOException IOException */ String extractText(String html) throws IOException { final ArrayList<String> list = new ArrayList<String>(); ParserDelegator parserDelegator = new ParserDelegator(); ParserCallback parserCallback = new ParserCallback() { public void handleText(final char[] data, final int pos) { list.add(new String(data)); } public void handleEndTag(Tag t, final int pos) { } public void handleComment(final char[] data, final int pos) { } public void handleError(final java.lang.String errMsg, final int pos) { } }; parserDelegator.parse(new StringReader(html), parserCallback, true); String text = ""; for (String s : list) { text += " " + s; } return text; }
From source file:gov.va.med.pharmacy.peps.presentation.common.displaytag.DefaultHssfExportView.java
/** * Parse HTML to String/*from w ww . j a v a2 s . co m*/ * @param html Input String to convert * @return text String value that is returned * @throws IOException IOException */ String extractText(String html) throws IOException { final ArrayList<String> list = new ArrayList<String>(); // instantiate the instanced of the delegator and callback classes for parsing ParserDelegator parserDelegator = new ParserDelegator(); ParserCallback parserCallback = new ParserCallback() { public void handleText(final char[] data, final int pos) { list.add(new String(data)); } // handle the end tag public void handleEndTag(Tag t, final int pos) { } // handle comments public void handleComment(final char[] data, final int pos) { } // handle errors public void handleError(final java.lang.String errMsg, final int pos) { } }; parserDelegator.parse(new StringReader(html), parserCallback, true); StringBuffer text = new StringBuffer(""); // add a space as necessary for (String s : list) { text.append(" ").append(s); } return text.toString(); }
From source file:com.clustercontrol.http.util.GetHttpResponse.java
/** * URL??/*w w w. jav a 2s.c om*/ * * @param url URL * @param timeout * @return * @throws KeyStoreException * @throws NoSuchAlgorithmException * @throws KeyManagementException * @throws IOException * @throws ClientProtocolException */ public boolean execute(String url, String post) { Response result = new Response(); try { CloseableHttpClient client = getHttpClient(); result.url = url; if (m_authType != null && !AuthType.NONE.equals(m_authType)) { URI uri = new URI(url); Credentials credential = null; String authSchema = null; switch (m_authType) { case BASIC: credential = new UsernamePasswordCredentials(m_authUser, m_authPassword); authSchema = "basic"; break; case NTLM: credential = new NTCredentials(m_authUser, m_authPassword, null, null); authSchema = "ntlm"; break; case DIGEST: credential = new UsernamePasswordCredentials(m_authUser, m_authPassword); authSchema = "digest"; break; default: m_log.warn("Auth type is unexpected value. AuthType = " + m_authType.name()); } if (credential != null) { AuthScope scope = new AuthScope(uri.getHost(), uri.getPort(), AuthScope.ANY_REALM, authSchema); if (m_cledentialProvider.getCredentials(scope) == null) { m_cledentialProvider.setCredentials(scope, credential); } } } HttpRequestBase request = null; if (post != null && !post.isEmpty()) { List<NameValuePair> urlParameters = new ArrayList<NameValuePair>(); for (String ss : post.split("&")) { int index = ss.indexOf("="); if (index <= 0) { continue; } urlParameters.add(new BasicNameValuePair(ss.substring(0, index), ss.substring(index + 1))); } if (m_log.isTraceEnabled()) { m_log.trace("post1=" + post + ", post2=" + urlParameters); } HttpPost requestPost = new HttpPost(url); Charset charset = Consts.UTF_8; try { charset = Charset.forName( HinemosPropertyUtil.getHinemosPropertyStr("monitor.http.post.charset", "UTF-8")); } catch (UnsupportedCharsetException e) { m_log.warn("UnsupportedCharsetException " + e.getMessage()); } requestPost.setEntity(new UrlEncodedFormEntity(urlParameters, charset)); requestPost.addHeader(HTTP.CONTENT_TYPE, "application/x-www-form-urlencoded"); request = requestPost; } else { request = new HttpGet(url); } // Execute the method. try { long start = HinemosTime.currentTimeMillis(); HttpResponse response = client.execute(request); result.responseTime = HinemosTime.currentTimeMillis() - start; result.statusCode = response.getStatusLine().getStatusCode(); // Header Header[] headers = response.getAllHeaders(); if (headers != null && headers.length > 0) { StringBuffer header = new StringBuffer(); for (int i = 0; i < headers.length; i++) { header.append((i != 0 ? "\n" : "") + headers[i]); } result.headerString = header.toString(); result.headers = Arrays.asList(headers); } if (result.statusCode == HttpStatus.SC_OK) { result.success = true; // Content-Type?text?????Body? Header header = response.getFirstHeader(HTTP.CONTENT_TYPE); boolean contentTypeFlag = false; String[] contentTypes = HinemosPropertyUtil .getHinemosPropertyStr(TARGET_CONTENT_TYPE_KEY, "text").split(","); if (header != null && header.getValue() != null) { String value = header.getValue(); for (String contentType : contentTypes) { if (value.indexOf(contentType) != -1) { contentTypeFlag = true; break; } } } if (contentTypeFlag) { ByteArrayOutputStream out = new ByteArrayOutputStream(); try (InputStream in = response.getEntity().getContent()) { byte[] buffer = new byte[BUFF_SIZE]; while (out.size() < BODY_MAX_SIZE) { int len = in.read(buffer); if (len < 0) { break; } out.write(buffer, 0, len); } } // ????HTTP ? meta ??????? // HTTP ?? // // Content-Type: text/html; charset=euc-jp // // meta ? // // <meta http-equiv="Content-Type" content="text/html; charset=euc-jp"> // <meta charset="euc-jp"> // // HTML ???meta ????? // // ???????????????? // ??????????????????? // ???????????????? // // ?????????????????? // // 1. HTTP ? Content-Type ? charset ???????? // ????????? // // 2. ????????"JISAutoDetect" ??? // ???? // // 3. ??????meta ?? // // 4. meta ?????????? // ???????? // ??????????? String charset = "JISAutoDetect"; Matcher m = chasetPattern.matcher(header.getValue()); if (m.matches()) charset = m.group(1); String content = new String(out.toByteArray(), charset); CharsetParser parser = new CharsetParser(); ParserDelegator p = new ParserDelegator(); p.parse(new StringReader(content), parser, true); if (parser.charset != null && !charset.equals(parser.charset)) { charset = parser.charset; content = new String(out.toByteArray(), charset); } result.responseBody = content; } else { result.errorMessage = MessageConstant.MESSAGE_FAIL_TO_CHECK_NOT_TEXT.getMessage(); } } else { result.errorMessage = response.getStatusLine().toString(); } } finally { request.releaseConnection(); } } catch (UnsupportedEncodingException e) { m_log.info("execute(): " + e.getMessage() + " class=" + e.getClass().getName()); result.errorMessage = "http receiving failure. (unsupported encoding)"; result.exception = e; } catch (IOException e) { m_log.info("execute(): Fatal transport error. " + e.getMessage() + " class=" + e.getClass().getName()); result.errorMessage = "http requesting failure. (I/O error : unreachable or timeout)"; result.exception = e; } catch (Exception e) { m_log.info("execute(): " + e.getMessage() + " class=" + e.getClass().getName()); result.errorMessage = "http requesting failure. " + e.getMessage() + "(" + e.getClass().getSimpleName() + ")"; result.exception = e; } m_requestResult = result; return m_requestResult.success; }
From source file:de.innovationgate.utils.WGUtils.java
/** * Converts a HTML to plain text by removing all HTML tags. * // ww w. j a v a 2 s . c om * @param html * The html * @param divider * The divider by which separate text fragments that were parsed * from the HTML should be divided. * @param ignoreWhitespace * Specify true if pure whitespace text fragments should be * ignored * @return The plain text * @throws IOException */ public static String toPlainText(String html, String divider, boolean ignoreWhitespace) throws IOException { // First remove data URLs from code which may bloat the process html = WGUtils.strReplace(html, "src=\"data:", new ReplaceProcessor() { @Override public int replace(String text, int from, int to, Writer out) throws IOException { int linkEnd = text.indexOf("\"", to); out.write("src=\""); if (linkEnd != -1) { return linkEnd; } else { return text.length() - 1; } } }, true); html = WGUtils.strReplace(html, "href=\"data:", new ReplaceProcessor() { @Override public int replace(String text, int from, int to, Writer out) throws IOException { int linkEnd = text.indexOf("\"", to); out.write("href=\""); if (linkEnd != -1) { return linkEnd; } else { return text.length() - 1; } } }, true); // Convert to plaintext PlainTextParserCallback callback = new PlainTextParserCallback(ignoreWhitespace, divider); ParserDelegator parserDelegator = new javax.swing.text.html.parser.ParserDelegator(); parserDelegator.parse(new java.io.StringReader(html), callback, true); return callback.getText(); }