Example usage for org.jsoup.nodes Document setBaseUri

List of usage examples for org.jsoup.nodes Document setBaseUri

Introduction

In this page you can find the example usage for org.jsoup.nodes Document setBaseUri.

Prototype

public void setBaseUri(final String baseUri) 

Source Link

Document

Update the base URI of this node and all of its descendants.

Usage

From source file:de.geeksfactory.opacclient.apis.Open.java

@Override
public SearchRequestResult search(List<SearchQuery> queries)
        throws IOException, OpacErrorException, JSONException {
    String url = opac_url + "/" + data.getJSONObject("urls").getString("advanced_search") + NO_MOBILE;
    Document doc = Jsoup.parse(httpGet(url, getDefaultEncoding()));
    doc.setBaseUri(url);

    int selectableCount = 0;
    for (SearchQuery query : queries) {
        if (query.getValue().equals("") || query.getValue().equals("false"))
            continue;

        if (query.getSearchField() instanceof TextSearchField) {
            TextSearchField field = (TextSearchField) query.getSearchField();
            if (field.getData().getBoolean("selectable")) {
                selectableCount++;//from w  ww .jav a 2  s  . c  o  m
                if (selectableCount > 3) {
                    throw new OpacErrorException(
                            stringProvider.getQuantityString(StringProvider.LIMITED_NUM_OF_CRITERIA, 3, 3));
                }
                String number = numberToText(selectableCount);
                Element searchField = doc.select("select[name$=" + number + "SearchField]").first();
                Element searchValue = doc.select("input[name$=" + number + "SearchValue]").first();
                searchField.val(field.getId());
                searchValue.val(query.getValue());
            } else {
                Element input = doc.select("input[name=" + field.getId() + "]").first();
                input.val(query.getValue());
            }
        } else if (query.getSearchField() instanceof DropdownSearchField) {
            DropdownSearchField field = (DropdownSearchField) query.getSearchField();
            Element input = doc.select("select[name=" + field.getId() + "]").first();
            input.val(query.getValue());
        } else if (query.getSearchField() instanceof CheckboxSearchField) {
            CheckboxSearchField field = (CheckboxSearchField) query.getSearchField();
            Element input = doc.select("input[name=" + field.getId() + "]").first();
            input.attr("checked", query.getValue());
        }
    }

    // Submit form
    FormElement form = (FormElement) doc.select("form").first();
    HttpEntity data = formData(form, "BtnSearch").build();
    String postUrl = form.attr("abs:action");

    String html = httpPost(postUrl, data, "UTF-8");
    Document doc2 = Jsoup.parse(html);
    doc2.setBaseUri(postUrl);
    return parse_search(doc2, 0);
}

From source file:de.geeksfactory.opacclient.apis.Zones22.java

private SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/APS_PRESENT_BIB");

    if (doc.select("#ErrorAdviceRow").size() > 0) {
        throw new OpacErrorException(doc.select("#ErrorAdviceRow").text().trim());
    }/*from   w  w  w  .j a  v  a2 s. com*/

    int results_total = -1;

    if (doc.select(".searchHits").size() > 0) {
        results_total = Integer.parseInt(
                doc.select(".searchHits").first().text().trim().replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    }

    if (doc.select(".pageNavLink").size() > 0) {
        searchobj = doc.select(".pageNavLink").first().attr("href").split("\\?")[0];
    }

    Elements table = doc.select("#BrowseList > tbody > tr");
    List<SearchResult> results = new ArrayList<SearchResult>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();

        String typetext = tr.select(".SummaryMaterialTypeField").text().replace("\n", " ").trim();
        if (data.has("mediatypes")) {
            try {
                sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(typetext)));
            } catch (JSONException e) {
                sr.setType(defaulttypes.get(typetext));
            } catch (IllegalArgumentException e) {
                sr.setType(defaulttypes.get(typetext));
            }
        } else {
            sr.setType(defaulttypes.get(typetext));
        }

        if (tr.select(".SummaryImageCell img[id^=Bookcover]").size() > 0) {
            String imgUrl = tr.select(".SummaryImageCell img[id^=Bookcover]").first().attr("src");
            sr.setCover(imgUrl);
        }

        String desc = "";
        Elements children = tr.select(".SummaryDataCell tr, .SummaryDataCellStripe tr");
        int childrennum = children.size();
        boolean haslink = false;

        for (int ch = 0; ch < childrennum; ch++) {
            Element node = children.get(ch);
            if (node.select(".SummaryFieldLegend").text().equals("Titel")) {
                desc += "<b>" + node.select(".SummaryFieldData").text().trim() + "</b><br />";

            } else if (node.select(".SummaryFieldLegend").text().equals("Verfasser")
                    || node.select(".SummaryFieldLegend").text().equals("Jahr")) {
                desc += node.select(".SummaryFieldData").text().trim() + "<br />";
            }

            if (node.select(".SummaryFieldData a.SummaryFieldLink").size() > 0 && haslink == false) {
                String href = node.select(".SummaryFieldData a.SummaryFieldLink").attr("abs:href");
                Map<String, String> hrefq = getQueryParamsFirst(href);
                sr.setId(hrefq.get("no"));
                haslink = true;
            }
        }
        if (desc.endsWith("<br />"))
            desc = desc.substring(0, desc.length() - 6);
        sr.setInnerhtml(desc);
        sr.setNr(i);

        results.add(sr);
    }

    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Heidi.java

private SearchRequestResult parse_search(String html, int page) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    int results_total = 0;
    if (doc.select("#heiditreffer").size() > 0) {
        String resstr = doc.select("#heiditreffer").text();
        String resnum = resstr.replaceAll("\\(([0-9.]+)([^0-9]*)\\)", "$1").replace(".", "");
        results_total = Integer.parseInt(resnum);
    }//from  w ww.  j ava 2s.c  om

    Elements table = doc.select("table.treffer tr");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();

        StringBuilder description = null;
        String author = "";

        for (Element link : tr.select("a")) {
            String kk = getQueryParamsFirst(link.absUrl("href")).get("katkey");
            if (kk != null) {
                sr.setId(kk);
                break;
            }
        }

        if (tr.select("span.Z3988").size() == 1) {
            // Luckily there is a <span class="Z3988"> item which provides
            // data in a standardized format.
            List<NameValuePair> z3988data;
            boolean hastitle = false;
            try {
                description = new StringBuilder();
                z3988data = URLEncodedUtils
                        .parse(new URI("http://dummy/?" + tr.select("span.Z3988").attr("title")), "UTF-8");
                for (NameValuePair nv : z3988data) {
                    if (nv.getValue() != null) {
                        if (!nv.getValue().trim().equals("")) {
                            if (nv.getName().equals("rft.btitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.atitle") && !hastitle) {
                                description.append("<b>").append(nv.getValue()).append("</b>");
                                hastitle = true;
                            } else if (nv.getName().equals("rft.au")) {
                                author = nv.getValue();
                            } else if (nv.getName().equals("rft.aufirst")) {
                                author = author + ", " + nv.getValue();
                            } else if (nv.getName().equals("rft.aulast")) {
                                author = nv.getValue();
                            } else if (nv.getName().equals("rft.date")) {
                                description.append("<br />").append(nv.getValue());
                            }
                        }
                    }
                }
            } catch (URISyntaxException e) {
                description = null;
            }
        }
        if (!"".equals(author)) {
            author = author + "<br />";
        }
        sr.setInnerhtml(author + description.toString());

        if (tr.select(".kurzstat").size() > 0) {
            String stattext = tr.select(".kurzstat").first().text();
            if (stattext.contains("ausleihbar")) {
                sr.setStatus(Status.GREEN);
            } else if (stattext.contains("online")) {
                sr.setStatus(Status.GREEN);
            } else if (stattext.contains("entliehen")) {
                sr.setStatus(Status.RED);
            } else if (stattext.contains("Prsenznutzung")) {
                sr.setStatus(Status.YELLOW);
            } else if (stattext.contains("bestellen")) {
                sr.setStatus(Status.YELLOW);
            }
        }
        if (tr.select(".typbild").size() > 0) {
            String typtext = tr.select(".typbild").first().text();
            if (typtext.contains("Buch")) {
                sr.setType(MediaType.BOOK);
            } else if (typtext.contains("DVD-ROM")) {
                sr.setType(MediaType.CD_SOFTWARE);
            } else if (typtext.contains("Online-Ressource")) {
                sr.setType(MediaType.EDOC);
            } else if (typtext.contains("DVD")) {
                sr.setType(MediaType.DVD);
            } else if (typtext.contains("Film")) {
                sr.setType(MediaType.MOVIE);
            } else if (typtext.contains("Zeitschrift")) {
                sr.setType(MediaType.MAGAZINE);
            } else if (typtext.contains("Musiknoten")) {
                sr.setType(MediaType.SCORE_MUSIC);
            } else if (typtext.contains("Bildliche Darstellung")) {
                sr.setType(MediaType.ART);
            } else if (typtext.contains("Zeitung")) {
                sr.setType(MediaType.NEWSPAPER);
            } else if (typtext.contains("Karte")) {
                sr.setType(MediaType.MAP);
            } else if (typtext.contains("Mehrteilig")) {
                sr.setType(MediaType.PACKAGE_BOOKS);
            }
        }

        results.add(sr);
    }
    // TODO
    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

private Document login(Account acc) throws IOException, OpacErrorException {
    String html = httpGet(/*from w ww  .j  a  v  a  2 s.co  m*/
            opac_url + "/APS_ZONES?fn=MyZone&Style=Portal3&SubStyle=&Lang=GER&ResponseEncoding" + "=utf-8",
            getDefaultEncoding());
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/APS_ZONES");
    if (doc.select(".AccountSummaryCounterLink").size() > 0) {
        return doc;
    }
    if (doc.select("#LoginForm").size() == 0) {
        throw new NotReachableException("Login form not found");
    }
    List<NameValuePair> params = new ArrayList<>();

    for (Element input : doc.select("#LoginForm input")) {
        if (!input.attr("name").equals("BRWR") && !input.attr("name").equals("PIN")) {
            params.add(new BasicNameValuePair(input.attr("name"), input.attr("value")));
        }
    }
    params.add(new BasicNameValuePair("BRWR", acc.getName()));
    params.add(new BasicNameValuePair("PIN", acc.getPassword()));

    String loginHtml;
    try {
        loginHtml = httpPost(doc.select("#LoginForm").get(0).absUrl("action"), new UrlEncodedFormEntity(params),
                getDefaultEncoding());
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
        return null;
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }

    if (!loginHtml.contains("Kontostand")) {
        throw new OpacErrorException(stringProvider.getString(StringProvider.LOGIN_FAILED));
    }

    Document doc2 = Jsoup.parse(loginHtml);
    Pattern objid_pat = Pattern.compile("Obj_([0-9]+)\\?.*");
    for (Element a : doc2.select("a")) {
        Matcher objid_matcher = objid_pat.matcher(a.attr("href"));
        if (objid_matcher.matches()) {
            accountobj = objid_matcher.group(1);
        }
    }

    return doc2;
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

private void loadMediaList(Document lentDoc, List<LentItem> items) throws IOException {
    items.addAll(parseMediaList(lentDoc));
    String nextPageUrl = findNextPageUrl(lentDoc);
    if (nextPageUrl != null) {
        Document doc = Jsoup.parse(httpGet(nextPageUrl, getDefaultEncoding()));
        doc.setBaseUri(lentDoc.baseUri());
        loadMediaList(doc, items);/*w  ww. j  a  v a2  s . c o m*/
    }
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

private void loadResList(Document lentDoc, List<ReservedItem> items) throws IOException {
    items.addAll(parseResList(lentDoc));
    String nextPageUrl = findNextPageUrl(lentDoc);
    if (nextPageUrl != null) {
        Document doc = Jsoup.parse(httpGet(nextPageUrl, getDefaultEncoding()));
        doc.setBaseUri(lentDoc.baseUri());
        loadResList(doc, items);//from  ww w.  jav  a 2s  . c  o m
    }
}

From source file:de.geeksfactory.opacclient.apis.Pica.java

protected DetailledItem parse_result(String html) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();
    for (Element a : doc.select("a[href*=PPN")) {
        Map<String, String> hrefq = getQueryParamsFirst(a.absUrl("href"));
        String ppn = hrefq.get("PPN");
        result.setId(ppn);//from  w  w  w . j a v  a  2 s  . co m
        break;
    }

    // GET COVER
    if (doc.select("td.preslabel:contains(ISBN) + td.presvalue").size() > 0) {
        Element isbnElement = doc.select("td.preslabel:contains(ISBN) + td.presvalue").first();
        String isbn = "";
        for (Node child : isbnElement.childNodes()) {
            if (child instanceof TextNode) {
                isbn = ((TextNode) child).text().trim();
                break;
            }
        }
        result.setCover(ISBNTools.getAmazonCoverURL(isbn, true));
    }

    // GET TITLE AND SUBTITLE
    String titleAndSubtitle;
    Element titleAndSubtitleElem = null;
    String titleRegex = ".*(Titel|Aufsatz|Zeitschrift|Gesamttitel"
            + "|Title|Article|Periodical|Collective\\stitle" + "|Titre|Article|P.riodique|Titre\\sg.n.ral).*";
    String selector = "td.preslabel:matches(" + titleRegex + ") + td.presvalue";
    if (doc.select(selector).size() > 0) {
        titleAndSubtitleElem = doc.select(selector).first();
        titleAndSubtitle = titleAndSubtitleElem.text().trim();
        int slashPosition = Math.min(titleAndSubtitle.indexOf("/"), titleAndSubtitle.indexOf(":"));
        String title;
        if (slashPosition > 0) {
            title = titleAndSubtitle.substring(0, slashPosition).trim();
            String subtitle = titleAndSubtitle.substring(slashPosition + 1).trim();
            result.addDetail(new Detail(stringProvider.getString(StringProvider.SUBTITLE), subtitle));
        } else {
            title = titleAndSubtitle;
        }
        result.setTitle(title);
    } else {
        result.setTitle("");
    }

    // Details
    int line = 0;
    Elements lines = doc.select("td.preslabel + td.presvalue");
    if (titleAndSubtitleElem != null) {
        lines.remove(titleAndSubtitleElem);
    }
    for (Element element : lines) {
        Element titleElem = element.firstElementSibling();
        String detail = "";
        if (element.select("div").size() > 1 && element.select("div").text().equals(element.text())) {
            boolean first = true;
            for (Element div : element.select("div")) {
                if (!div.text().replace("\u00a0", " ").trim().equals("")) {
                    if (!first) {
                        detail += "\n" + div.text().replace("\u00a0", " ").trim();
                    } else {
                        detail += div.text().replace("\u00a0", " ").trim();
                        first = false;
                    }
                }
            }
        } else {
            detail = element.text().replace("\u00a0", " ").trim();
        }
        String title = titleElem.text().replace("\u00a0", " ").trim();

        if (element.select("hr").size() > 0)
        // after the separator we get the copies
        {
            break;
        }

        if (detail.length() == 0 && title.length() == 0) {
            line++;
            continue;
        }
        if (title.contains(":")) {
            title = title.substring(0, title.indexOf(":")); // remove colon
        }
        result.addDetail(new Detail(title, detail));

        if (element.select("a").size() == 1 && !element.select("a").get(0).text().trim().equals("")) {
            String url = element.select("a").first().absUrl("href");
            if (!url.startsWith(opac_url)) {
                result.addDetail(new Detail(stringProvider.getString(StringProvider.LINK), url));
            }
        }

        line++;
    }
    line++; // next line after separator

    // Copies
    Copy copy = new Copy();
    String location = "";

    // reservation info will be stored as JSON
    JSONArray reservationInfo = new JSONArray();

    while (line < lines.size()) {
        Element element = lines.get(line);
        if (element.select("hr").size() == 0) {
            Element titleElem = element.firstElementSibling();
            String detail = element.text().trim();
            String title = titleElem.text().replace("\u00a0", " ").trim();

            if (detail.length() == 0 && title.length() == 0) {
                line++;
                continue;
            }

            if (title.contains("Standort") || title.contains("Vorhanden in") || title.contains("Location")) {
                location += detail;
            } else if (title.contains("Sonderstandort")) {
                location += " - " + detail;
            } else if (title.contains("Systemstelle") || title.contains("Subject")) {
                copy.setDepartment(detail);
            } else if (title.contains("Fachnummer") || title.contains("locationnumber")) {
                copy.setLocation(detail);
            } else if (title.contains("Signatur") || title.contains("Shelf mark")) {
                copy.setShelfmark(detail);
            } else if (title.contains("Anmerkung")) {
                location += " (" + detail + ")";
            } else if (title.contains("Link")) {
                result.addDetail(new Detail(title.replace(":", "").trim(), detail));
            } else if (title.contains("Status") || title.contains("Ausleihinfo")
                    || title.contains("Ausleihstatus") || title.contains("Request info")) {
                // Find return date
                Pattern pattern = Pattern.compile("(till|bis) (\\d{2}-\\d{2}-\\d{4})");
                Matcher matcher = pattern.matcher(detail);
                if (matcher.find()) {
                    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd-MM-yyyy").withLocale(Locale.GERMAN);
                    try {
                        copy.setStatus(detail.substring(0, matcher.start() - 1).trim());
                        copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
                    } catch (IllegalArgumentException e) {
                        e.printStackTrace();
                        copy.setStatus(detail);
                    }
                } else {
                    copy.setStatus(detail);
                }
                // Get reservation info
                if (element.select("a:has(img[src*=inline_arrow])").size() > 0) {
                    Element a = element.select("a:has(img[src*=inline_arrow])").first();
                    boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*");
                    JSONObject reservation = new JSONObject();
                    try {
                        reservation.put("multi", multipleCopies);
                        reservation.put("link", _extract_url(a.absUrl("href")));
                        reservation.put("desc", location);
                        reservationInfo.put(reservation);
                    } catch (JSONException e1) {
                        e1.printStackTrace();
                    }
                    result.setReservable(true);
                }
            }
        } else {
            copy.setBranch(location);
            result.addCopy(copy);
            location = "";
            copy = new Copy();
        }
        line++;
    }

    if (copy.notEmpty()) {
        copy.setBranch(location);
        result.addCopy(copy);
    }

    if (reservationInfo.length() == 0) {
        // No reservation info found yet, because we didn't find any copies.
        // If there is a reservation link somewhere in the rows we interpreted
        // as details, we still want to use it.
        if (doc.select("td a:has(img[src*=inline_arrow])").size() > 0) {
            Element a = doc.select("td a:has(img[src*=inline_arrow])").first();
            boolean multipleCopies = a.text().matches(".*(Exemplare|Volume list).*");
            JSONObject reservation = new JSONObject();
            try {
                reservation.put("multi", multipleCopies);
                reservation.put("link", _extract_url(a.attr("href")));
                reservation.put("desc", location);
                reservationInfo.put(reservation);
            } catch (JSONException e1) {
                e1.printStackTrace();
            }
            result.setReservable(true);
        }
    }
    result.setReservation_info(reservationInfo.toString());

    // Volumes
    if (doc.select("a[href^=FAM?PPN=]").size() > 0) {
        String href = doc.select("a[href^=FAM?PPN=]").attr("href");
        String ppn = getQueryParamsFirst(href).get("PPN");
        Map<String, String> data = new HashMap<>();
        data.put("ppn", ppn);
        result.setVolumesearch(data);
    }

    return result;
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    Document login = login(acc);/*ww  w.j  a v  a  2 s  .c o m*/
    if (login == null) {
        return null;
    }

    AccountData res = new AccountData(acc.getId());

    String lentLink = null;
    String resLink = null;
    int lent_cnt = -1;
    int res_cnt = -1;
    for (Element td : login.select(".AccountSummaryCounterNameCell, .AccountSummaryCounterNameCellStripe, "
            + ".CAccountDetailFieldNameCellStripe, .CAccountDetailFieldNameCell")) {
        String section = td.text().trim();
        if (section.contains("Entliehene Medien")) {
            lentLink = td.select("a").attr("href");
            lent_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Vormerkungen")) {
            resLink = td.select("a").attr("href");
            res_cnt = Integer.parseInt(td.nextElementSibling().text().trim());
        } else if (section.contains("Kontostand")) {
            res.setPendingFees(td.nextElementSibling().text().trim());
        } else if (section.matches("Ausweis g.ltig bis")) {
            res.setValidUntil(td.nextElementSibling().text().trim());
        }
    }
    for (Element a : login.select("a.AccountMenuLink")) {
        if (a.text().contains("Ausleihen")) {
            lentLink = a.attr("href");
        } else if (a.text().contains("Vormerkungen")) {
            resLink = a.attr("href");
        }
    }
    if (lentLink == null) {
        return null;
    }

    List<LentItem> lentItems = new ArrayList<>();
    String lentUrl = opac_url + "/" + lentLink.replace("utf-8?Method", "utf-8&Method");
    String lentHtml = httpGet(lentUrl, getDefaultEncoding());
    Document lentDoc = Jsoup.parse(lentHtml);
    lentDoc.setBaseUri(lentUrl);
    loadMediaList(lentDoc, lentItems);
    res.setLent(lentItems);

    // In Koeln, the reservations link only doesn't show on the overview page
    if (resLink == null) {
        for (Element a : lentDoc.select("a.AccountMenuLink")) {
            if (a.text().contains("Vormerkungen")) {
                resLink = a.attr("href");
            }
        }
    }

    List<ReservedItem> reservedItems = new ArrayList<>();
    String resHtml = httpGet(opac_url + "/" + resLink, getDefaultEncoding());
    Document resDoc = Jsoup.parse(resHtml);
    loadResList(resDoc, reservedItems);
    res.setReservations(reservedItems);

    return res;
}

From source file:de.geeksfactory.opacclient.apis.Zones.java

private SearchRequestResult parse_search(String html, int page) throws OpacErrorException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url + "/APS_PRESENT_BIB");

    if (doc.select("#ErrorAdviceRow").size() > 0) {
        throw new OpacErrorException(doc.select("#ErrorAdviceRow").text().trim());
    }/*ww w.j a  v a 2 s  . c  o  m*/

    int results_total = -1;

    String searchHitsQuery = version18 ? "td:containsOwn(Total)" : ".searchHits";
    if (doc.select(searchHitsQuery).size() > 0) {
        results_total = Integer.parseInt(
                doc.select(searchHitsQuery).first().text().trim().replaceAll(".*\\(([0-9]+)\\).*", "$1"));
    } else if (doc.select("span:matches(\\[\\d+/\\d+\\])").size() > 0) {
        // Zones 1.8 - searchGetPage
        String text = doc.select("span:matches(\\[\\d+/\\d+\\])").text();
        Pattern pattern = Pattern.compile("\\[\\d+/(\\d+)\\]");
        Matcher matcher = pattern.matcher(text);
        if (matcher.find()) {
            results_total = Integer.parseInt(matcher.group(1));
        }
    }

    if (doc.select(".pageNavLink").size() > 0) {
        // Zones 2.2
        searchobj = doc.select(".pageNavLink").first().attr("href").split("\\?")[0];
    } else if (doc.select("div[targetObject]").size() > 0) {
        // Zones 1.8 - search
        searchobj = doc.select("div[targetObject]").attr("targetObject").split("\\?")[0];
    } else {
        // Zones 1.8 - searchGetPage

        // The page contains a data structure that at first glance seems to be JSON, but uses
        // "=" instead of ":". So we parse it using regex...
        Pattern pattern = Pattern.compile("targetObject = \"([^\\?]+)[^\"]+\"");
        Matcher matcher = pattern.matcher(doc.html());
        if (matcher.find()) {
            searchobj = matcher.group(1);
        }
    }

    Elements table = doc.select("#BrowseList > tbody > tr," // Zones 2.2
            + " .inRoundBox1" // Zones 1.8
    );
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();

        String typetext;
        if (version18) {
            String[] parts = tr.select("img[src^=IMG/MAT]").attr("src").split("/");
            typetext = parts[parts.length - 1].replace(".gif", "");
        } else {
            typetext = tr.select(".SummaryMaterialTypeField").text().replace("\n", " ").trim();
        }

        if (data.has("mediatypes")) {
            try {
                sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(typetext)));
            } catch (JSONException | IllegalArgumentException e) {
                sr.setType(defaulttypes.get(typetext));
            }
        } else {
            sr.setType(defaulttypes.get(typetext));
        }

        String imgUrl = null;
        if (version18) {
            if (tr.select("a[title=Titelbild]").size() > 0) {
                imgUrl = tr.select("a[title=Titelbild]").attr("href");
            } else if (tr.select("img[width=50]").size() > 0) {
                // TODO: better way to select these cover images? (found in Hannover)
                imgUrl = tr.select("img[width=50]").attr("src");
            }
        } else {
            if (tr.select(".SummaryImageCell img[id^=Bookcover]").size() > 0) {
                imgUrl = tr.select(".SummaryImageCell img[id^=Bookcover]").first().attr("src");
            }
        }
        sr.setCover(imgUrl);

        if (version18) {
            if (tr.select("img[src$=oci_1.gif]").size() > 0) {
                // probably can only appear when searching the catalog on a terminal in
                // the library.
                sr.setStatus(SearchResult.Status.GREEN);
            } else if (tr.select("img[src$=blob_amber.gif]").size() > 0) {
                sr.setStatus(SearchResult.Status.YELLOW);
            }
        }

        String desc = "";
        String childrenQuery = version18 ? "table[cellpadding=1] tr"
                : ".SummaryDataCell tr, .SummaryDataCellStripe tr";
        Elements children = tr.select(childrenQuery);
        int childrennum = children.size();
        boolean haslink = false;

        for (int ch = 0; ch < childrennum; ch++) {
            Element node = children.get(ch);
            if (getName(node).equals("Titel")) {
                desc += "<b>" + getValue(node).trim() + "</b><br />";
            } else if (getName(node).equals("Verfasser") || getName(node).equals("Jahr")) {
                desc += getValue(node).trim() + "<br />";
            }

            String linkSelector = version18 ? "a[href*=ShowStock], a[href*=APS_CAT_IDENTIFY]"
                    : ".SummaryFieldData a.SummaryFieldLink";
            if (node.select(linkSelector).size() > 0 && !haslink) {
                String href = node.select(linkSelector).attr("abs:href");
                Map<String, String> hrefq = getQueryParamsFirst(href);
                if (hrefq.containsKey("no")) {
                    sr.setId(hrefq.get("no"));
                } else if (hrefq.containsKey("Key")) {
                    sr.setId(hrefq.get("Key"));
                }
                haslink = true;
            }
        }
        if (desc.endsWith("<br />")) {
            desc = desc.substring(0, desc.length() - 6);
        }
        sr.setInnerhtml(desc);
        sr.setNr(i);

        results.add(sr);
    }

    return new SearchRequestResult(results, results_total, page);
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

protected SearchRequestResult parse_search(String html, int page) {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);
    Elements table = doc.select(".resulttab tr.result_trefferX, .resulttab tr.result_treffer");
    List<SearchResult> results = new ArrayList<>();
    for (int i = 0; i < table.size(); i++) {
        Element tr = table.get(i);
        SearchResult sr = new SearchResult();
        int contentindex = 1;
        if (tr.select("td a img").size() > 0) {
            String[] fparts = tr.select("td a img").get(0).attr("src").split("/");
            String fname = fparts[fparts.length - 1];
            if (data.has("mediatypes")) {
                try {
                    sr.setType(MediaType.valueOf(data.getJSONObject("mediatypes").getString(fname)));
                } catch (JSONException | IllegalArgumentException e) {
                    sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                            .replace(".gif", "").replace(".png", "")));
                }/*from   ww w.  j  av  a 2  s .  co m*/
            } else {
                sr.setType(defaulttypes.get(fname.toLowerCase(Locale.GERMAN).replace(".jpg", "")
                        .replace(".gif", "").replace(".png", "")));
            }
        } else {
            if (tr.children().size() == 3) {
                contentindex = 2;
            }
        }
        sr.setInnerhtml(tr.child(contentindex).child(0).html());

        sr.setNr(i);
        Element link = tr.child(contentindex).select("a").first();
        try {
            if (link != null && link.attr("href").contains("detmediennr")) {
                Map<String, String> params = getQueryParamsFirst(link.attr("abs:href"));
                String nr = params.get("detmediennr");
                if (Integer.parseInt(nr) > i + 1) {
                    // Seems to be an ID
                    if (params.get("detDB") != null) {
                        sr.setId("&detmediennr=" + nr + "&detDB=" + params.get("detDB"));
                    } else {
                        sr.setId("&detmediennr=" + nr);
                    }
                }
            }
        } catch (Exception e) {
        }
        try {
            if (tr.child(1).childNode(0) instanceof Comment) {
                Comment c = (Comment) tr.child(1).childNode(0);
                String comment = c.getData().trim();
                String id = comment.split(": ")[1];
                sr.setId(id);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        results.add(sr);
    }
    int results_total = -1;
    if (doc.select(".result_gefunden").size() > 0) {
        try {
            results_total = Integer.parseInt(
                    doc.select(".result_gefunden").text().trim().replaceAll(".*[^0-9]+([0-9]+).*", "$1"));
        } catch (NumberFormatException e) {
            e.printStackTrace();
            results_total = -1;
        }
    }
    return new SearchRequestResult(results, results_total, page);
}