Example usage for org.jsoup.select Elements html

List of usage examples for org.jsoup.select Elements html

Introduction

In this page you can find the example usage for org.jsoup.select Elements html.

Prototype

public String html() 

Source Link

Document

Get the combined inner HTML of all matched elements.

Usage

From source file:org.brunocvcunha.taskerbox.impl.jobs.LinkedInJobSeeker.java

private boolean handleJob(JSONObject job)
        throws JSONException, ClientProtocolException, IOException, URISyntaxException {
    if (job.getBoolean("isApplied")) {
        return false;
    }/*w ww . j av  a2  s  .c om*/

    long jobId = job.getLong("id");

    if (!this.openIds.contains(jobId)) {
        this.openIds.add(jobId);
        // uniqueCount++;
    } else {
        return false;
    }

    String jobTitle = job.getString("fmt_jobTitle").replaceAll("</?B>", "");

    if (!this.externalApply && job.has("sourceDomain")) {
        logInfo(log,
                jobId + " - " + jobTitle + " - " + job.getString("sourceDomain") + " --> ignored [external]");

        String sourceDomain = job.getString("sourceDomain");
        if (!sourceDomain.contains("jobvite") && !sourceDomain.contains("ziprecruiter")) {
            return true;
        }
    }

    String jobEmployer = job.getString("fmt_companyName");

    String jobUrl = "https://www.linkedin.com/jobs2/view/" + jobId;
    if (alreadyPerformedAction(jobUrl)) {
        return true;
    }

    String location = "";
    if (job.has("fmt_location")) {
        location = job.getString("fmt_location");
    }
    String headline = jobUrl + " - " + location + " - " + jobTitle + " - " + jobEmployer;

    if (job.has("sourceDomain")) {
        String sourceDomain = job.getString("sourceDomain");
        if (this.externalApply && (sourceDomain.contains("empregocerto.uol.com.br")
                || sourceDomain.contains("jobomas.com") || sourceDomain.contains("curriculum.com.br"))) {
            logInfo(log, "-- Ignored [externalApply - domain " + sourceDomain + "] " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }
    }

    if (!considerTitle(jobTitle)) {
        logInfo(log, "-- Ignored [title] " + headline);
        addAlreadyPerformedAction(jobUrl);
        return true;
    }

    try {
        FileWriter out = new FileWriter(new File(this.tempDir + "\\job-db\\_titles.txt"), true);
        out.write(jobTitle + "\r\n");
        out.close();
    } catch (Exception e) {
    }

    if (!considerEmployer(jobEmployer)) {
        logInfo(log, "-- Ignored [employer] " + headline);
        addAlreadyPerformedAction(jobUrl);
        return true;
    }

    if (!considerLocation(location)) {
        logInfo(log, "-- Ignored [location] " + headline);
        addAlreadyPerformedAction(jobUrl);
        return true;
    }

    HttpEntity jobEntity = TaskerboxHttpBox.getInstance().getEntityForURL(jobUrl);
    String jobResult = TaskerboxHttpBox.getInstance().readResponseFromEntity(jobEntity);
    Document jobDocument = Jsoup.parse(jobResult);
    Elements elDescription = jobDocument.select("div.description-section").select("div.rich-text");
    Elements elSkills = jobDocument.select("div.skills-section").select("div.rich-text");

    // FileWriter out = new FileWriter(new File(tempDir + "\\job-db\\" + jobId + ".txt"));
    // out.write(elDescription.text() + "\r\n");
    // out.write(elSkills.text());
    // out.close();

    if (!this.externalApply && !jobResult.contains("onsite-apply")) {
        logInfo(log, "-- Ignored [onsite apply] " + headline);
        addAlreadyPerformedAction(jobUrl);

        try {
            Thread.sleep(5000L);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        return true;
    }

    if (!considerVisaDescription(elDescription.html()) || !considerVisaDescription(elSkills.html())) {
        logInfo(log, "-- Ignored [visa] " + headline);
        addAlreadyPerformedAction(jobUrl);
        return true;
    }
    if (!considerExperienceDescription(elDescription.html())
            || !considerExperienceDescription(elSkills.html())) {
        logInfo(log, "-- Ignored [exp] " + headline);
        addAlreadyPerformedAction(jobUrl);
        return true;
    }

    ScorerResult result = LinkedInJobDBComparer.getScore(elDescription.html() + " - " + elSkills.html());

    if (result.getScore() < this.requiredScore) {
        logInfo(log,
                "-- Ignored [scorer] " + result.getScore() + " - " + result.getMatches() + " - " + headline);
        addAlreadyPerformedAction(jobUrl);
        return true;
    }

    headline = headline + " - " + result.getMatches();

    logInfo(log, headline);
    logInfo(log, elDescription.html());

    if (this.actionCount++ == this.maxCount) {
        this.setPaused(true);
        return false;
    }

    performUnique(jobUrl);

    try {
        Thread.sleep(5000L);
    } catch (InterruptedException e) {
        e.printStackTrace();
    }

    return true;

}

From source file:cn.scujcc.bug.bitcoinplatformandroid.fragment.QuotationInformationFragment.java

public void getImageAndContent(String url, News news) throws Exception {

    Document doc = Jsoup.connect(url).get();

    Elements image = doc.select(".entry-content img");
    news.setImage(image.attr("src"));

    Elements content = doc.select(".entry-content");
    news.setContent(content.html());

}

From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageViewFragment.java

/**
 * Format the user post by removing the vb style quotes and the 
 * duplicate youtube links//from  www  . j a va2 s.  c  o  m
 * @param innerPost   The element that contains the inner post
 * @return         The formatted string
 */
private String formatUserPost(Elements innerPost) {

    // Remove the duplicate youtube links (this is caused by a plugin on 
    // the forum that embeds youtube videos automatically)
    for (Element embedded : innerPost.select("div[id^=ame_doshow_post_]"))
        embedded.remove();

    // Remove the vbulletin quotes
    String upost = Utils.reformatQuotes(innerPost.html());

    return upost;
}

From source file:org.jtotus.network.NordnetConnect.java

public boolean authenticated() {
    String loginPage = null;//from w w  w .  j  a  v a  2  s .  c o m

    if (connector == null) {
        System.err.printf("Failure connector is empty\n");
        return false;
    }

    loginPage = connector.getPage(_PORTFOLIO_URL_);
    if (loginPage == null) {
        System.err.printf("Failure unable to fetch portfolio\n");
        return false;
    }

    Document doc = Jsoup.parse(loginPage);
    Elements elements = doc.select("title");

    //FIXME: UTF-8 for httpclient!
    if (elements.html().equals("Yleisn&auml;kym&auml; - Nordnet")) {
        return true;
    } else {
        System.err.printf("Failure in match for : %s \n", elements.html());
    }

    return false;
}

From source file:org.jtotus.network.NordnetConnect.java

private StockTick parseAuthenticatedStream(String infoPage, String stockName) {
    StockTick tick = null;/*from  ww w.j  a  v  a  2 s .c om*/

    Document doc = Jsoup.parse(infoPage);
    Elements elements = doc.select("tr[class=first]");

    doc = Jsoup.parse(elements.html());
    elements = doc.select("td");

    if (elements.size() != 15) { //not authenticated 13
        return tick;
    }
    tick = new StockTick();
    tick.setStockName(stockName);

    Iterator<Element> iter = elements.iterator();
    for (int count = 0; iter.hasNext(); count++) {
        Element elem = iter.next();

        log.info("Element value (" + count + "):" + elem.text());
        switch (count) {
        case 3:
            if (!elem.text().equalsIgnoreCase("OMX Helsinki")) {
                System.err.printf("Data corruption in broker site? :%s for: %s\n", elem.text(), stockName);
                return null;
            }
            break;
        case 4://latest price
            tick.setLatestPrice(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 5://latest buy
            tick.setLatestBuy(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 6://latest sell
            tick.setLatestSell(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 7://latest Highest
            tick.setLatestHighest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 8://latest Lowest
            tick.setLatestLowest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 11://latest Lowest
            tick.setVolume(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 12://latest Lowest
            tick.setTradesSum(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 14://Time
            tick.setTime(elem.text().trim());
            break;

        //TODO:currency and time
        default:
            log.info("Not matched(" + count + ") = " + elem.text());
            break;
        }
    }
    log.info("StockTick:" + tick.toString());

    return tick;
}

From source file:org.jtotus.network.NordnetConnect.java

private StockTick parseNonAuthenticatedStream(String infoPage, String stockName) {
    StockTick tick = null;/*from   w  w  w.ja  v  a 2  s.  c o  m*/

    Document doc = Jsoup.parse(infoPage);
    Elements elements = doc.select("tr[class=first]");

    doc = Jsoup.parse(elements.html());
    elements = doc.select("td");

    if (elements.size() != 13) { //not authenticated 13
        return tick;
    }
    tick = new StockTick();
    tick.setStockName(stockName);

    Iterator<Element> iter = elements.iterator();
    for (int count = 0; iter.hasNext(); count++) {
        Element elem = iter.next();

        System.out.printf("Non-Auth Element value (%d):%s for:%s\n", count, elem.text(), stockName);
        switch (count) {
        case 1:
            if (!elem.text().equalsIgnoreCase("OMX Helsinki")) {
                System.err.printf("Data corruption in broker site? :%s for: %s\n", elem.text(), stockName);
                return null;
            }
            break;
        case 2://latest price
            tick.setLatestPrice(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 3://latest buy
            tick.setLatestBuy(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 4://latest sell
            tick.setLatestSell(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 5://latest Highest
            tick.setLatestHighest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 6://latest Lowest
            tick.setLatestLowest(Double.parseDouble(elem.text().replace(",", ".").trim()));
            break;
        case 9://Volume
            tick.setVolume(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 10://Trade Sum
            tick.setTradesSum(Double.parseDouble(elem.text().replace(" ", "").trim()));
            break;
        case 12://Time
            tick.setTime(elem.text().trim());
            break;

        //TODO:currency and time
        default:
            System.out.printf("Not matched(%d) = %s \n", count, elem.text());
            break;
        }
    }
    System.out.printf("StockTick:%s\n", tick.toString());

    return tick;
}

From source file:org.opennms.protocols.http.collector.HttpCollectionHandler.java

@Override
protected void fillCollectionSet(String urlString, Request request, CollectionAgent agent,
        XmlCollectionSet collectionSet, XmlSource source) throws Exception {
    XmlCollectionResource nodeResource = new XmlSingleInstanceCollectionResource(agent);
    Document doc = getJsoupDocument(urlString, request);
    for (XmlGroup group : source.getXmlGroups()) {
        LOG.debug("fillCollectionSet: getting resources for XML group {} using selector {}", group.getName(),
                group.getResourceXpath());
        Date timestamp = getTimeStamp(doc, group);
        Elements elements = doc.select(group.getResourceXpath());
        LOG.debug("fillCollectionSet: {} => {}", group.getResourceXpath(), elements);
        String resourceName = getResourceName(elements, group);
        LOG.debug("fillCollectionSet: processing XML resource {}", resourceName);
        XmlCollectionResource collectionResource;
        if (group.getResourceType().equalsIgnoreCase(CollectionResource.RESOURCE_TYPE_NODE)) {
            collectionResource = nodeResource;
        } else {// w w w  . ja  v a  2 s .co  m
            collectionResource = getCollectionResource(agent, resourceName, group.getResourceType(), timestamp);
        }
        LOG.debug("fillCollectionSet: processing resource {}", collectionResource);
        AttributeGroupType attribGroupType = new AttributeGroupType(group.getName(), group.getIfType());
        for (XmlObject object : group.getXmlObjects()) {
            Elements el = elements.select(object.getXpath());
            XmlCollectionAttributeType attribType = new XmlCollectionAttributeType(object, attribGroupType);
            collectionResource.setAttributeValue(attribType, el == null ? null : el.html());
        }
        processXmlResource(collectionResource, attribGroupType);
        collectionSet.getCollectionResources().add(collectionResource);
    }
}

From source file:org.opennms.protocols.http.collector.HttpCollectionHandler.java

/**
 * Gets the resource name./*  w  w  w  .j  a va2s .c om*/
 *
 * @param elements the JSoup elements
 * @param group the group
 * @return the resource name
 */
private String getResourceName(Elements elements, XmlGroup group) {
    // Processing multiple-key resource name.
    if (group.hasMultipleResourceKey()) {
        List<String> keys = new ArrayList<String>();
        for (String key : group.getXmlResourceKey().getKeyXpathList()) {
            LOG.debug("getResourceName: getting key for resource's name using selector {}", key);
            Elements el = elements.select(key);
            if (el != null) {
                keys.add(el.html());
            }
        }
        return StringUtils.join(keys, "_");
    }
    // If key-xpath doesn't exist or not found, a node resource will be assumed.
    if (group.getKeyXpath() == null) {
        return "node";
    }
    // Processing single-key resource name.
    LOG.debug("getResourceName: getting key for resource's name using selector {}", group.getKeyXpath());
    Elements el = elements.select(group.getKeyXpath());
    return el == null ? null : el.html();
}

From source file:org.opennms.protocols.http.collector.HttpCollectionHandler.java

/**
 * Gets the time stamp.//from   w w  w .j a  va  2  s . c o m
 * 
 * @param document the JSoup document
 * @param group the group
 * @return the time stamp
 */
protected Date getTimeStamp(Document doc, XmlGroup group) {
    if (group.getTimestampXpath() == null) {
        return null;
    }
    String pattern = group.getTimestampFormat() == null ? "yyyy-MM-dd HH:mm:ss" : group.getTimestampFormat();
    LOG.debug(
            "getTimeStamp: retrieving custom timestamp to be used when updating RRDs using selector {} and pattern {}",
            group.getTimestampXpath(), pattern);
    Elements el = doc.select(group.getTimestampXpath());
    if (el == null) {
        return null;
    }
    String value = el.html();
    Date date = null;
    try {
        DateTimeFormatter dtf = DateTimeFormat.forPattern(pattern);
        DateTime dateTime = dtf.parseDateTime(value);
        date = dateTime.toDate();
    } catch (Exception e) {
        LOG.warn("getTimeStamp: can't convert custom timetime {} using pattern {}", value, pattern);
    }
    return date;
}

From source file:org.opens.rules.doc.utils.ruledesign.extractor.ExtractRuleDesignHtmlCode.java

/**
 * Before using it please set the FOLDER variable with the path where you
 * want to create your extract html files.
 *
 * @param args the command line arguments
 *//*  ww w  . j av a  2s.com*/
public static void main(String[] args) {
    //      first boucle for is for the theme number
    for (int i = 1; i < MAX_THEME_NUMBER; i++) {
        // second boucle for is for the critere number
        for (int j = 1; j < MAX_CRITERE_NUMBER; j++) {
            // third boucle for is for the test number
            for (int k = 1; k < MAX_TEST_NUMBER; k++) {
                URL url = null;
                try {
                    Connection connection = Jsoup.connect(PREFIX_URL_TO_REFERENTIAL + i + "-" + j + "-" + k);
                    Connection.Response resp = connection.response();
                    if (resp.statusCode() != 404) {
                        url = new URL(PREFIX_URL_TO_REFERENTIAL + i + "-" + j + "-" + k);
                        Document doc = Jsoup.parse(url, 4000);
                        System.out.println(doc.title());

                        Elements summary = doc.select(".content.clear-block");
                        FileUtils.writeStringToFile(
                                new File(FOLDER + "/RuleDesign/Rule-" + i + "-" + j + "-" + k + ".html"),
                                summary.html());
                    }
                } catch (MalformedURLException ex) {
                    System.out.println("URL MAL FORMEE");
                } catch (IOException ex) {
                    if (url != null) {
                        System.out.println("URL 404 : " + url.toString());
                    } else {
                        System.out.println("EMPTY URL");
                    }
                }
            }
        }
    }
}