Example usage for org.jsoup.select Elements parents

List of usage examples for org.jsoup.select Elements parents

Introduction

In this page you can find the example usage for org.jsoup.select Elements parents.

Prototype

public Elements parents() 

Source Link

Document

Get all of the parents and ancestor elements of the matched elements.

Usage

From source file:gov.medicaid.screening.dao.impl.OIGDAOBean.java

/**
 * Parses the excluded provider profile details page.
 *
 * @param page the details page/*  ww  w . ja v a 2 s . co m*/
 * @return the parsed license details
 * @throws ParsingException if the expected tags were not found
 */
private ProviderProfile parseProfile(Document page) throws ParsingException {
    ProviderProfile profile = new ProviderProfile();

    // name
    User user = new User();
    profile.setUser(user);
    user.setLastName(page.select("th:containsOwn(Last Name) + td").text());
    user.setFirstName(page.select("th:containsOwn(First Name) + td").text());

    // business
    String businessName = page.select("th:containsOwn(Entity) + td").text();
    if (!"N/A".equals(businessName)) {
        Business business = new Business();
        profile.setBusiness(business);
        business.setName(businessName);
    }

    // DOB
    Date dob = parseDate(page.select("th:has(acronym:containsOwn(DOB)) + td").text(), DATE_FORMAT);
    if (dob != null) {
        profile.setDob(dob);
    }

    // exclusion type
    ExclusionType exclusionType = new ExclusionType();
    profile.setExclusionType(exclusionType);
    exclusionType.setName(page.select("th:containsOwn(Excl. Type) + td").text());

    // specialty
    List<Specialty> specialties = new ArrayList<Specialty>();
    Specialty specialty = new Specialty();
    specialties.add(specialty);
    specialty.setName(page.select("th:containsOwn(Specialty) + td").text());
    profile.setSpecialties(specialties);

    // address
    Elements addrElement = page.select("th:containsOwn(Address) + td");
    String addr = addrElement.text();
    Element addrNextRow = addrElement.parents().first().nextElementSibling();
    if ("".equals(addrNextRow.select("th").text())) {
        addr += " " + addrNextRow.select("td").text();
    }
    Address address = new Address();
    address.setLocation(addr);
    profile.setAddresses(Arrays.asList(new Address[] { address }));

    Date date = parseDate(page.select("th:containsOwn(Excl. Date) + td").text(), DATE_FORMAT);
    if (date != null) {
        profile.setRequestEffectiveDate(date);
    }

    return profile;
}

From source file:xxx.web.comments.debates.impl.ProConOrgParser.java

@Override
public Debate parseDebate(InputStream inputStream) throws IOException {
    Debate result = new Debate();

    Document doc = Jsoup.parse(inputStream, "UTF-8", "http://www.procon.org/");

    // Set the Url of the doc

    // title// w  ww  . j a va2  s . c om
    Element body = doc.body();
    Elements debateTitleElements = body.select("h2");
    //        Elements debateTitleElements = body.select("p[class=title]").select("p[style]");

    if (debateTitleElements.first() == null) {
        // not a debate
        return null;
    }

    String title = Utils.normalize(debateTitleElements.first().text());
    result.setTitle(title);

    Elements proConTr = body.select("tr > td > b:contains(PRO \\(yes\\))");

    if (proConTr == null || proConTr.parents() == null || proConTr.parents().first() == null
            || proConTr.parents().first().parents() == null
            || proConTr.parents().first().parents().first() == null
            || proConTr.parents().first().parents().first().nextElementSibling() == null) {
        // not a pro-con debate
        return null;
    }

    Element trAnswers = proConTr.parents().first().parents().first().nextElementSibling();

    // the PRO side
    Element proTd = trAnswers.select("td").get(0);
    Element conTd = trAnswers.select("td").get(1);

    //        System.out.println(proTd.select("blockquote").size());
    //        System.out.println(conTd.select("blockquote").size());

    for (Element text : proTd.select("blockquote > div[class=editortext]")) {
        Argument argument = new Argument();
        argument.setStance("pro");
        argument.setText(extractPlainTextFromTextElement(text));
        argument.setOriginalHTML(text.html());

        // set ID
        idCounter++;
        argument.setId("pcq_" + idCounter);

        if (!argument.getText().isEmpty()) {
            result.getArgumentList().add(argument);
        } else {
            System.err.println("Failed to extract text from " + text.html());
        }
    }

    for (Element text : conTd.select("blockquote > div[class=editortext]")) {
        Argument argument = new Argument();
        argument.setStance("con");
        argument.setText(extractPlainTextFromTextElement(text));
        argument.setOriginalHTML(text.html());

        idCounter++;
        argument.setId("pcq_" + idCounter);

        if (!argument.getText().isEmpty()) {
            result.getArgumentList().add(argument);
        } else {
            System.err.println("Failed to extract text from " + text.html());
        }
    }

    // show some stats:
    Map<String, Integer> map = new HashMap<>();
    map.put("pro", 0);
    map.put("con", 0);
    for (Argument argument : result.getArgumentList()) {
        map.put(argument.getStance(), map.get(argument.getStance()) + 1);
    }
    System.out.println(map);

    return result;
}