Example usage for org.jsoup.nodes Element firstElementSibling

List of usage examples for org.jsoup.nodes Element firstElementSibling

Introduction

In this page you can find the example usage for org.jsoup.nodes Element firstElementSibling.

Prototype

public Element firstElementSibling() 

Source Link

Document

Gets the first element sibling of this element.

Usage

From source file:de.geeksfactory.opacclient.apis.Pica.java

protected DetailledItem parse_result(String html) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//from  w  w w . jav a2s  .  co m

    DetailledItem result = new DetailledItem();
    for (Element a : doc.select("a[href*=PPN")) {
        Map<String, String> hrefq = getQueryParamsFirst(a.absUrl("href"));
        String ppn = hrefq.get("PPN");
        result.setId(ppn);
        break;
    }

    // GET COVER
    if (doc.select("td.preslabel:contains(ISBN) + td.presvalue").size() > 0) {
        Element isbnElement = doc.select("td.preslabel:contains(ISBN) + td.presvalue").first();
        String isbn = "";
        for (Node child : isbnElement.childNodes()) {
            if (child instanceof TextNode) {
                isbn = ((TextNode) child).text().trim();
                break;
            }
        }
        result.setCover(ISBNTools.getAmazonCoverURL(isbn, true));
    }

    // GET TITLE AND SUBTITLE
    String titleAndSubtitle;
    Element titleAndSubtitleElem = null;
    String titleRegex = ".*(Titel|Aufsatz|Zeitschrift|Gesamttitel"
            + "|Title|Article|Periodical|Collective\\stitle" + "|Titre|Article|P.riodique|Titre\\sg.n.ral).*";
    String selector = "td.preslabel:matches(" + titleRegex + ") + td.presvalue";
    if (doc.select(selector).size() > 0) {
        titleAndSubtitleElem = doc.select(selector).first();
        titleAndSubtitle = titleAndSubtitleElem.text().trim();
        int slashPosition = Math.min(titleAndSubtitle.indexOf("/"), titleAndSubtitle.indexOf(":"));
        String title;
        if (slashPosition > 0) {
            title = titleAndSubtitle.substring(0, slashPosition).trim();
            String subtitle = titleAndSubtitle.substring(slashPosition + 1).trim();
            result.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle));
        } else {
            title = titleAndSubtitle;
        }
        result.setTitle(title);
    } else {
        result.setTitle("");
    }

    // Details
    int line = 0;
    Elements lines = doc.select("td.preslabel + td.presvalue");
    if (titleAndSubtitleElem != null) {
        lines.remove(titleAndSubtitleElem);
    }
    for (Element element : lines) {
        Element titleElem = element.firstElementSibling();
        String detail = "";
        if (element.select("div").size() > 1 && element.select("div").text().equals(element.text())) {
            boolean first = true;
            for (Element div : element.select("div")) {
                if (!div.text().replace("\u00a0", " ").trim().equals("")) {
                    if (!first) {
                        detail += "\n" + div.text().replace("\u00a0", " ").trim();
                    } else {
                        detail += div.text().replace("\u00a0", " ").trim();
                        first = false;
                    }
                }
            }
        } else {
            detail = element.text().replace("\u00a0", " ").trim();
        }
        String title = titleElem.text().replace("\u00a0", " ").trim();

        if (element.select("hr").size() > 0)
        // after the separator we get the copies
        {
            break;
        }

        if (detail.length() == 0 && title.length() == 0) {
            line++;
            continue;
        }
        if (title.contains(":")) {
            title = title.substring(0, title.indexOf(":")); // remove colon
        }
        result.addDetail(new Detail(title, detail));

        if (element.select("a").size() == 1 && !element.select("a").get(0).text().trim().equals("")) {
            String url = element.select("a").first().absUrl("href");
            if (!url.startsWith(opac_url)) {
                result.addDetail(new Detail(stringProvider.getString(StringProvider.LINK), url));
            }
        }

        line++;
    }
    line++; // next line after separator

    // Copies
    Copy copy = new Copy();
    String location = "";

    // reservation info will be stored as JSON
    JSONArray reservationInfo = new JSONArray();

    while (line < lines.size()) {
        Element element = lines.get(line);
        if (element.select("hr").size() == 0) {
            Element titleElem = element.firstElementSibling();
            String detail = element.text().trim();
            String title = titleElem.text().replace("\u00a0", " ").trim();

            if (detail.length() == 0 && title.length() == 0) {
                line++;
                continue;
            }

            if (title.contains("Standort") || title.contains("Vorhanden in") || title.contains("Location")) {
                location += detail;
            } else if (title.contains("Sonderstandort")) {
                location += " - " + detail;
            } else if (title.contains("Systemstelle") || title.contains("Subject")) {
                copy.setDepartment(detail);
            } else if (title.contains("Fachnummer") || title.contains("locationnumber")) {
                copy.setLocation(detail);
            } else if (title.contains("Signatur") || title.contains("Shelf mark")) {
                copy.setShelfmark(detail);
            } else if (title.contains("Anmerkung")) {
                location += " (" + detail + ")";
            } else if (title.contains("Link")) {
                result.addDetail(new Detail(title.replace(":", "").trim(), detail));
            } else if (title.contains("Status") || title.contains("Ausleihinfo")
                    || title.contains("Ausleihstatus") || title.contains("Request info")) {
                // Find return date
                Pattern pattern = Pattern.compile("(till|bis) (\\d{2}-\\d{2}-\\d{4})");
                Matcher matcher = pattern.matcher(detail);
                if (matcher.find()) {
                    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd-MM-yyyy").withLocale(Locale.GERMAN);
                    try {
                        copy.setStatus(detail.substring(0, matcher.start() - 1).trim());
                        copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
                    } catch (IllegalArgumentException e) {
                        e.printStackTrace();
                        copy.setStatus(detail);
                    }
                } else {
                    copy.setStatus(detail);
                }
                // Get reservation info
                if (element.select("a:has(img[src*=inline_arrow])").size() > 0) {
                    Element a = element.select("a:has(img[src*=inline_arrow])").first();
                    boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*");
                    JSONObject reservation = new JSONObject();
                    try {
                        reservation.put("multi", multipleCopies);
                        reservation.put("link", _extract_url(a.absUrl("href")));
                        reservation.put("desc", location);
                        reservationInfo.put(reservation);
                    } catch (JSONException e1) {
                        e1.printStackTrace();
                    }
                    result.setReservable(true);
                }
            }
        } else {
            copy.setBranch(location);
            result.addCopy(copy);
            location = "";
            copy = new Copy();
        }
        line++;
    }

    if (copy.notEmpty()) {
        copy.setBranch(location);
        result.addCopy(copy);
    }

    if (reservationInfo.length() == 0) {
        // No reservation info found yet, because we didn't find any copies.
        // If there is a reservation link somewhere in the rows we interpreted
        // as details, we still want to use it.
        if (doc.select("td a:has(img[src*=inline_arrow])").size() > 0) {
            Element a = doc.select("td a:has(img[src*=inline_arrow])").first();
            boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*");
            JSONObject reservation = new JSONObject();
            try {
                reservation.put("multi", multipleCopies);
                reservation.put("link", _extract_url(a.attr("href")));
                reservation.put("desc", location);
                reservationInfo.put(reservation);
            } catch (JSONException e1) {
                e1.printStackTrace();
            }
            result.setReservable(true);
        }
    }
    result.setReservation_info(reservationInfo.toString());

    // Volumes
    if (doc.select("a[href^=FAM?PPN=]").size() > 0) {
        String href = doc.select("a[href^=FAM?PPN=]").attr("href");
        String ppn = getQueryParamsFirst(href).get("PPN");
        Map<String, String> data = new HashMap<>();
        data.put("ppn", ppn);
        result.setVolumesearch(data);
    }

    return result;
}