Example usage for org.jsoup.nodes Document body

List of usage examples for org.jsoup.nodes Document body

Introduction

In this page you can find the example usage for org.jsoup.nodes Document body.

Prototype

public Element body() 

Source Link

Document

Accessor to the document's body element.

Usage

From source file:edu.harvard.iq.safe.lockss.impl.LOCKSSPlatformStatusHtmlParser.java

/**
 *
 * @param is/* w ww. j a  v  a 2s . c o m*/
 */
@Override
public void getPlatformStatusData(InputStream is) {

    try {

        Document doc = DataUtil.load(is, "UTF-8", "");
        Element body = doc.body();

        // most of the target items are sandwitched by <b> tag
        // this can be used to reach each target item.
        String tmpCurrentTime = null;
        String tmpUpTime = null;
        String currentTime = null;
        Elements tags = body.getElementsByTag("b");

        for (Element tag : tags) {

            // get the current-time string: for 1.52.3 or older daemons
            // this is the ony place to get it.
            String tagText = tag.text();
            logger.log(Level.FINE, "working on tagText={0}", tagText);

            if (tagText.equals("Daemon Status")) {
                // find current time and up running
                currentTime = tag.parent().parent().text();
                logger.log(Level.INFO, "currentTime text=[{0}]", currentTime);
                // "currentTime =Daemon Status lockss.statelib.lib.in.us (usdocspln group) 01:25:55 03/01/12, up 7d5h21m"
                tmstmpMatcher = currentTimeStampPattern.matcher(currentTime);

                if (tmstmpMatcher.find()) {
                    logger.log(Level.INFO, "group 0={0}", tmstmpMatcher.group(0));
                    tmpCurrentTime = tmstmpMatcher.group(1);
                    logger.log(Level.INFO, "Current Time:group 1={0}", tmpCurrentTime);
                    tmpUpTime = tmstmpMatcher.group(2);
                    logger.log(Level.INFO, "UpTime:group 2={0}", tmpUpTime);
                }
            }

            // get the remaining key-value sets
            if (fieldNameSet.contains(tagText)) {

                Element parent = tag.parent();
                String fieldValue = parent.nextElementSibling().text();
                logger.log(Level.FINE, "{0}={1}", new Object[] { tagText, fieldValue });
                summaryInfoMap.put(tagText, fieldValue);
            }
        }

        // extract the daemon version and platform info that are located
        // at the bottom
        // these data are sandwitched by a <center> tag
        Elements ctags = body.getElementsByTag("center");
        String version = null;
        String platform = null;
        for (Element ctag : ctags) {
            String cText = ctag.text();
            logger.log(Level.FINE, "center tag Text={0}", cText);
            // cText is like this:
            // Daemon 1.53.3 built 28-Jan-12 01:06:36 on build7.lockss.org, Linux RPM 1
            if (StringUtils.isNotBlank(cText) && ctag.child(0).nodeName().equals("font")) {
                String[] versionPlatform = cText.split(", ");
                if (versionPlatform.length == 2) {
                    logger.log(Level.INFO, "daemon version={0};platform={1}", versionPlatform);
                    version = DaemonStatusDataUtil.getDaemonVersion(versionPlatform[0]);
                    platform = versionPlatform[1];
                } else {
                    // the above regex failed
                    logger.log(Level.WARNING, "String-formatting differs; use pattern matching");
                    version = DaemonStatusDataUtil.getDaemonVersion(cText);
                    int platformOffset = cText.lastIndexOf(", ") + 2;
                    platform = cText.substring(platformOffset);
                    logger.log(Level.INFO, "platform={0}", platform);

                }
            }
        }

        if (summaryInfoMap.containsKey("V3 Identity")) {
            String ipAddress = DaemonStatusDataUtil.getPeerIpAddress(summaryInfoMap.get("V3 Identity"));
            logger.log(Level.INFO, "ipAddress={0}", ipAddress);

            if (StringUtils.isNotBlank(ipAddress)) {
                boxInfoMap.put("host", ipAddress);
                if (!ipAddress.equals(summaryInfoMap.get("IP Address"))) {
                    summaryInfoMap.put("IP Address", ipAddress);
                }
            } else {
                logger.log(Level.WARNING, "host token is blank or null: use IP Address instead");
                logger.log(Level.INFO, "IP Address={0}", summaryInfoMap.get("IP Address"));
                boxInfoMap.put("host", summaryInfoMap.get("IP Address"));
            }
        }

        // for pre-1.53.3 versions
        boxInfoMap.put("time", tmpCurrentTime);
        if (!summaryInfoMap.containsKey("Current Time")) {
            summaryInfoMap.put("Current Time", tmpCurrentTime);
        }

        boxInfoMap.put("up", tmpUpTime);
        if (!summaryInfoMap.containsKey("Uptime")) {
            summaryInfoMap.put("Uptime", tmpUpTime);
        }

        boxInfoMap.put("version", version);
        if (!summaryInfoMap.containsKey("Daemon Version")) {
            summaryInfoMap.put("Daemon Version", version);
        }

        boxInfoMap.put("platform", platform);
        if (!summaryInfoMap.containsKey("Platform")) {
            summaryInfoMap.put("Platform", platform);
        }

    } catch (IOException ex) {
        logger.log(Level.SEVERE, "IO error", ex);
    }

    logger.log(Level.INFO, "boxInfoMap={0}", boxInfoMap);
    logger.log(Level.INFO, "summaryInfo={0}", summaryInfoMap);
}

From source file:de.dlopes.stocks.facilitator.services.impl.FinanznachrichtenOrderbuchExtractorImpl.java

@Override
public List<String> getFinanceData(String url, FinanceDataType dataType) {

    List<String> list = new ArrayList<String>();

    try {//www .j  a va  2s .  co m

        Document doc = null;
        if (url.startsWith("file://")) {
            File input = new File(url.replaceFirst("file://", ""));
            doc = Jsoup.parse(input, "UTF-8");
        } else {
            URL input = new URL(url);
            doc = Jsoup.parse(input, 30000);
        }

        Elements elements = doc.body().select("span[id^=productid] > span");

        for (Element e : elements) {
            String text = e.text();

            // Guard: move on when the text is empty
            if (StringUtils.isEmpty(text)) {
                continue;
            }

            text = StringUtils.trimAllWhitespace(text);

            // Guard: move on when the text does not contain the ISIN or WKN
            if (!text.startsWith(dataType.name() + ":")) {
                continue;
            }

            text = text.replace(dataType.name() + ":", "");
            list.add(text);

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    return list;

}

From source file:com.money.manager.ex.investment.morningstar.MorningstarPriceUpdater.java

/**
 * Parse Morningstar response into price information.
 * @param symbol Morningstar symbol/*from   w  w w.  j  a va2  s .  c o m*/
 * @param html Result
 * @return An object containing price details
 */
private PriceDownloadedEvent parse(String symbol, String html) {
    Document doc = Jsoup.parse(html);

    // symbol
    String yahooSymbol = symbolConverter.getYahooSymbol(symbol);

    // price
    String priceString = doc.body().getElementById("last-price-value").text();
    if (TextUtils.isEmpty(priceString)) {
        throw new RuntimeException("No price available for " + symbol);
    }
    Money price = MoneyFactory.fromString(priceString);
    // currency
    String currency = doc.body().getElementById("curency").text();
    if (currency.equals("GBX")) {
        price = price.divide(100, MoneyFactory.MAX_ALLOWED_PRECISION);
    }

    // date
    String dateString = doc.body().getElementById("asOfDate").text();
    String dateFormat = "MM/dd/yyyy HH:mm:ss";
    //        DateTimeFormatter formatter = DateTimeFormat.forPattern(dateFormat);
    // the time zone is EST
    //        DateTime date = formatter.withZone(DateTimeZone.forID("America/New_York"))
    //                .parseDateTime(dateString)
    //                .withZone(DateTimeZone.forID("Europe/Vienna"));
    // convert time zone
    MmxDate dateTime = new MmxDate(dateString, dateFormat).setTimeZone("America/New_York")
            .inTimeZone("Europe/Vienna");

    // todo: should this be converted to the exchange time?

    return new PriceDownloadedEvent(yahooSymbol, price, dateTime.toDate());
}

From source file:me.bramhaag.discordselfbot.commands.fun.CommandLMGTFY.java

@Command(name = "lmgtfy", minArgs = 1)
public void execute(@NonNull Message message, @NonNull TextChannel channel, @NonNull String[] args) {
    String tinyURL = "http://tinyurl.com/api-create.php?url=";
    String lmgtfyURL = "http://lmgtfy.com?q=";

    String url;/*from   w w w .j  av  a  2  s  .c om*/

    try {
        if (args[0].equalsIgnoreCase("--expanded") || args[0].equalsIgnoreCase("-e") && args.length >= 2) {
            url = lmgtfyURL + URLEncoder.encode(StringUtils.join(Arrays.copyOfRange(args, 1, args.length), " "),
                    "UTF-8");
        } else {
            Document doc;
            try {
                doc = Jsoup
                        .connect(tinyURL + lmgtfyURL + URLEncoder.encode(StringUtils.join(args, " "), "UTF-8"))
                        .get();
            } catch (IOException e) {
                e.printStackTrace();

                Util.sendError(message, e.getMessage());
                return;
            }

            url = doc.body().text();
        }
    } catch (UnsupportedEncodingException e) {
        Util.sendError(message, e.getMessage());
        return;
    }

    message.editMessage("<" + url + ">").queue();
}

From source file:ac.simons.oembed.Oembed.java

public String transformDocumentString(final String documentHtml) {
    final Document rv = transformDocument(documentHtml);
    rv.outputSettings().prettyPrint(false).escapeMode(EscapeMode.xhtml);
    return rv.body().html();
}

From source file:de.dlopes.stocks.facilitator.services.impl.FinanzenNetIndexHTMLExtractorImpl.java

@Override
public List<String> getFinanceData(String url, FinanceDataType dataType) {

    List<String> list = new ArrayList<String>();

    try {/*  w w w  .  ja v  a 2 s .c om*/

        Document doc = null;
        if (url.startsWith("file://")) {
            File input = new File(url.replaceFirst("file://", ""));
            doc = Jsoup.parse(input, "UTF-8");
        } else {
            URL input = new URL(url);
            doc = Jsoup.parse(input, 30000);
        }

        //String index = doc.body().select("div#mainWrapper > div.main h1 > a").text();
        Elements elements = doc.body().select("#fragIndexBarView > table tr");

        for (Element e : elements) {
            String text = e.select("td > div").text();

            // Guard: move on when the text is empty
            if (StringUtils.isEmpty(text)) {
                continue;
            }

            text = StringUtils.trimAllWhitespace(text);
            list.add(text);

        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    return list;
}

From source file:com.aliyun.openservices.odps.console.commands.DescribeResourceCommand.java

@Override
public String runHtml(Document dom) throws ODPSConsoleException, OdpsException {
    Odps odps = getCurrentOdps();/*  ww  w  .j a  va2  s. c o  m*/
    if (!(odps.resources().exists(projectName, resourceName))) {
        throw new ODPSConsoleException("Resource not found : " + resourceName);
    }
    Resource r = odps.resources().get(projectName, resourceName);

    Element element = dom.body().appendElement("div").appendElement("dl");
    element.appendElement("dt").text("Name");
    element.appendElement("dd").text(r.getName());
    element.appendElement("dt").text("Owner");
    element.appendElement("dd").text(r.getOwner());
    element.appendElement("dt").text("Type");
    element.appendElement("dd").text(String.valueOf(r.getType()));
    if (r.getType() == Resource.Type.TABLE) {
        TableResource tr = (TableResource) r;
        String tableSource = tr.getSourceTable().getProject() + "." + tr.getSourceTable().getName();
        if (tr.getSourceTablePartition() != null) {
            tableSource += " partition(" + tr.getSourceTablePartition().toString() + ")";
        }
        element.appendElement("dt").text("SourceTableName");
        element.appendElement("dd").text(tableSource);
    }

    element.appendElement("dt").text("Comment");
    element.appendElement("dd").text(r.getComment());
    element.appendElement("dt").text("CreatedTime");
    element.appendElement("dd").text(DATE_FORMAT.format(r.getCreatedTime()));
    element.appendElement("dt").text("LastModifiedTime");
    element.appendElement("dd").text(DATE_FORMAT.format(r.getLastModifiedTime()));

    return dom.toString();

}

From source file:net.groupbuy.entity.Article.java

/**
 * ?/* w  w w. j av a 2 s .  co  m*/
 * 
 * @return 
 */
@Transient
public String[] getPageContents() {
    if (StringUtils.isEmpty(content)) {
        return new String[] { "" };
    }
    if (content.contains(PAGE_BREAK_SEPARATOR)) {
        return content.split(PAGE_BREAK_SEPARATOR);
    } else {
        List<String> pageContents = new ArrayList<String>();
        Document document = Jsoup.parse(content);
        List<Node> children = document.body().childNodes();
        if (children != null) {
            int textLength = 0;
            StringBuffer html = new StringBuffer();
            for (Node node : children) {
                if (node instanceof Element) {
                    Element element = (Element) node;
                    html.append(element.outerHtml());
                    textLength += element.text().length();
                    if (textLength >= PAGE_CONTENT_LENGTH) {
                        pageContents.add(html.toString());
                        textLength = 0;
                        html.setLength(0);
                    }
                } else if (node instanceof TextNode) {
                    TextNode textNode = (TextNode) node;
                    String text = textNode.text();
                    String[] contents = PARAGRAPH_SEPARATOR_PATTERN.split(text);
                    Matcher matcher = PARAGRAPH_SEPARATOR_PATTERN.matcher(text);
                    for (String content : contents) {
                        if (matcher.find()) {
                            content += matcher.group();
                        }
                        html.append(content);
                        textLength += content.length();
                        if (textLength >= PAGE_CONTENT_LENGTH) {
                            pageContents.add(html.toString());
                            textLength = 0;
                            html.setLength(0);
                        }
                    }
                }
            }
            String pageContent = html.toString();
            if (StringUtils.isNotEmpty(pageContent)) {
                pageContents.add(pageContent);
            }
        }
        return pageContents.toArray(new String[pageContents.size()]);
    }
}

From source file:com.mythesis.userbehaviouranalysis.WebParser.java

/**
 * Parse the url and get all the content
 * @param link the url to parse/*from   w w w  .j av  a  2 s .  c o m*/
 * @return The content parsed
 */
private String cleanhtml(String link) {
    try {
        Document doc = Jsoup.connect(link).timeout(10 * 1000).get();
        String title = doc.title();
        String mainbody = doc.body().text();
        Elements links = doc.select("a[href]");
        Elements media = doc.select("[src]");
        //fix link html to remove https:// or http:// and simple /
        if (link.substring(link.length() - 1, link.length()).equalsIgnoreCase("/")) {
            link = link.substring(0, link.length() - 1);
        }
        if (link.substring(0, 5).equalsIgnoreCase("https")) {
            link = link.substring(8);
        } else if (link.substring(0, 4).equalsIgnoreCase("http")) {
            link = link.substring(7);
        }
        String anchortext = "";
        String alttext = "";
        //-----get the anchor text of internal links
        for (Element el : links) {
            String str_check = el.attr("abs:href");
            if (el.attr("abs:href").contains(link) && el.text().length() > 1) {
                anchortext = anchortext + el.text() + " ";
            }
        }
        //-------get alt text to internal images links
        for (Element medi : media) {
            if (medi.getElementsByTag("img").attr("src").contains(link)) {
                alttext = alttext + " " + medi.getElementsByTag("img").attr("alt");
            }
            if (medi.getElementsByTag("img").attr("src").startsWith("/")) {
                alttext = alttext + " " + medi.getElementsByTag("img").attr("alt");
            }
        }
        String content = mainbody + title + anchortext + alttext;

        return content;

    } catch (IOException ex) {
        Logger.getLogger(WebParser.class.getName()).log(Level.SEVERE, null, ex);
        String check = null;
        return check;
    } catch (NullPointerException ex) {
        Logger.getLogger(WebParser.class.getName()).log(Level.SEVERE, null, ex);
        String check = null;
        return check;
    } catch (Exception ex) {
        Logger.getLogger(WebParser.class.getName()).log(Level.SEVERE, null, ex);
        String check = null;
        return check;
    }

}

From source file:me.vertretungsplan.parser.SVPlanParser.java

@NotNull
SubstitutionSchedule parseSVPlanSchedule(List<Document> docs) throws IOException, JSONException {
    SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);

    for (Document doc : docs) {
        if (doc.select(".svp").size() > 0) {
            for (Element svp : doc.select(".svp")) {
                parseSvPlanDay(v, svp, doc);
            }/*from  w w w  . j  a  va 2s  .  c  o  m*/
        } else if (doc.select(".Trennlinie").size() > 0) {
            Element div = new Element(Tag.valueOf("div"), "");
            for (Node node : doc.body().childNodesCopy()) {
                if (node instanceof Element && ((Element) node).hasClass("Trennlinie")
                        && div.select("table").size() > 0) {
                    parseSvPlanDay(v, div, doc);
                    div = new Element(Tag.valueOf("div"), "");
                } else {
                    div.appendChild(node);
                }
            }
            parseSvPlanDay(v, div, doc);
        } else {
            parseSvPlanDay(v, doc, doc);
        }
    }

    v.setClasses(getAllClasses());
    v.setTeachers(getAllTeachers());
    return v;
}