Example usage for org.jsoup.nodes Node attr

List of usage examples for org.jsoup.nodes Node attr

Introduction

In this page you can find the example usage for org.jsoup.nodes Node attr.

Prototype

public String attr(String attributeKey) 

Source Link

Document

Get an attribute's value by its key.

Usage

From source file:com.bibisco.manager.TextEditorManager.java

private static void parseNode(HtmlParsingResult pHtmlParsingResult, Node pNode, boolean pBlnExcludeSpellCheck) {

    mLog.debug("Start parseNode(HtmlParsingResult, Node, boolean): ", pNode.nodeName());

    if ("#text".equals(pNode.nodeName())) {
        parseTextNode(pHtmlParsingResult, pNode);
    } else if ("spellerror".equals(pNode.nodeName()) && pBlnExcludeSpellCheck) {
        // Do nothing
    } else if ("span".equals(pNode.nodeName()) && pNode.attr("style").equals("display: none;")) {
        // Do nothing
    } else {/*w ww  .ja v  a2 s  .  c  om*/
        if ("ul".equals(pNode.nodeName())) {
            pHtmlParsingResult.ulOpen = true;
        }
        if ("ol".equals(pNode.nodeName())) {
            pHtmlParsingResult.olOpen = true;
        }
        if ("li".equals(pNode.nodeName())) {
            if (pHtmlParsingResult.ulOpen) {
                pHtmlParsingResult.characterCount += 1;
            } else if (pHtmlParsingResult.olOpen) {
                pHtmlParsingResult.characterCount += 1;
                pHtmlParsingResult.olLiPosition += 1;
                pHtmlParsingResult.characterCount += String.valueOf(pHtmlParsingResult.olLiPosition).length();
            }
        }
        for (Node lNode : pNode.childNodes()) {
            parseNode(pHtmlParsingResult, lNode, pBlnExcludeSpellCheck);
        }
        if ("ul".equals(pNode.nodeName())) {
            pHtmlParsingResult.ulOpen = false;
        }
        if ("ol".equals(pNode.nodeName())) {
            pHtmlParsingResult.olOpen = false;
            pHtmlParsingResult.olLiPosition = 0;
        }
    }

    mLog.debug("End parseNode(HtmlParsingResult, Node, boolean)");
}

From source file:com.screenslicer.core.util.Util.java

public static boolean isHidden(Node node) {
    return node.attr("class").indexOf(HIDDEN_MARKER) > -1;
}

From source file:com.screenslicer.core.util.Util.java

public static boolean isFiltered(Node node) {
    return node.attr("class").indexOf(FILTERED_MARKER) > -1;
}

From source file:com.screenslicer.core.util.Util.java

public static boolean isFilteredLenient(Node node) {
    return node.attr("class").indexOf(FILTERED_MARKER) > -1
            || node.attr("class").indexOf(FILTERED_LENIENT_MARKER) > -1;
}

From source file:com.screenslicer.core.util.Util.java

public static String classId(Node node) {
    if (node != null) {
        String className = node.attr("class");
        if (!CommonUtil.isEmpty(className)) {
            Matcher matcher = nodeMarker.matcher(className);
            if (matcher.find()) {
                return matcher.group(0);
            }/*from w w  w . j a  v a  2  s  .c o  m*/
        }
    }
    return null;
}

From source file:com.screenslicer.core.util.BrowserUtil.java

private static WebElement toElement(Browser browser, Node node, HtmlNode htmlNode, boolean recurse) {
    if (node != null || htmlNode != null) {
        try {//ww w .j  a v a 2  s  .c  om
            String classId = NodeUtil.classId(node);
            if (classId != null) {
                WebElement element = browser.findElementByClassName(classId);
                if (element != null) {
                    return element;
                }
            }
        } catch (Browser.Retry r) {
            throw r;
        } catch (Browser.Fatal f) {
            throw f;
        } catch (Throwable t) {
            Log.exception(t);
        }
    }
    if (recurse) {
        Log.warn("Could not convert Node to WebElement... trying fuzzy search");
        if (node != null) {
            try {
                HtmlNode find = new HtmlNode();
                find.alt = node.attr("alt");
                find.classes = CommonUtil.isEmpty(node.attr("class")) ? null : node.attr("class").split("\\s");
                find.href = node.attr("href");
                find.id = node.attr("id");
                find.innerText = node instanceof Element ? ((Element) node).text() : null;
                find.name = node.attr("name");
                find.tagName = node.nodeName();
                find.title = node.attr("title");
                find.role = node.attr("role");
                find.type = node.attr("type");
                find.value = node.attr("value");
                find.fuzzy = true;
                WebElement found = toElement(browser, find,
                        BrowserUtil.openElement(browser, false, null, null, null, null), false);
                found = found == null ? toElement(browser, find, null, false) : found;
                if (found != null) {
                    return found;
                }
            } catch (Browser.Retry r) {
                throw r;
            } catch (Browser.Fatal f) {
                throw f;
            } catch (Throwable t) {
                Log.exception(t);
            }
        }
        if (htmlNode != null) {
            try {
                WebElement found = toElement(browser, htmlNode,
                        BrowserUtil.openElement(browser, false, null, null, null, null), false);
                found = found == null ? toElement(browser, htmlNode, null, false) : found;
                if (found != null) {
                    return found;
                }
            } catch (Browser.Retry r) {
                throw r;
            } catch (Browser.Fatal f) {
                throw f;
            } catch (Throwable t) {
                Log.exception(t);
            }
        }
    }
    Log.warn("Could not convert Node to WebElement... failed permanently");
    return null;
}

From source file:com.screenslicer.core.util.BrowserUtil.java

public static Element openElement(final Browser browser, boolean init, final String[] whitelist,
        final String[] patterns, final HtmlNode[] urlNodes, final UrlTransform[] transforms)
        throws ActionFailed {
    try {//  w  w  w .j  a  v  a  2 s  .c  om
        if (init) {
            int myStartId;
            synchronized (startIdLock) {
                startId = startId == Integer.MAX_VALUE ? 0 : startId + 1;
                myStartId = startId;
            }
            browser.executeScript("      var all = document.body.getElementsByTagName('*');"
                    + "for(var i = 0; i < all.length; i++){"
                    + "  if(all[i].className && typeof all[i].className == 'string'){"
                    + "    all[i].className=all[i].className.replace(/" + HIDDEN_MARKER + "/g,'').replace(/"
                    + FILTERED_MARKER + "/g,'').replace(/" + FILTERED_LENIENT_MARKER
                    + "/g,'').replace(/\\s+/g,' ').trim();" + "  }" + "}" + isVisible
                    + "for(var j = 0; j < all.length; j++){" + "  if(!all[j].className.match(/" + NODE_MARKER
                    + "\\d+_\\d+/g)){" + "    all[j].className += ' " + NODE_MARKER + myStartId + "_'+j+' ';"
                    + "  }" + "  if(!isVisible(all[j])){" + "    all[j].className += ' " + HIDDEN_MARKER + " ';"
                    + "  }" + "}");
        }
        String url = browser.getCurrentUrl();
        new URL(url);
        Element element = CommonUtil.parse(browser.getPageSource(), url, false).body();
        element.traverse(new NodeVisitor() {
            @Override
            public void tail(Node node, int depth) {
            }

            @Override
            public void head(Node node, int depth) {
                if (!node.nodeName().equals("#text") && !NodeUtil.isEmpty(node)) {
                    NodeUtil.markVisible(node);
                }
            }
        });
        if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0)
                || (urlNodes != null && urlNodes.length > 0)) {
            element.traverse(new NodeVisitor() {
                @Override
                public void tail(Node node, int depth) {
                }

                @Override
                public void head(Node node, int depth) {
                    if (node.nodeName().equals("a")) {
                        if (UrlUtil.isUrlFiltered(browser.getCurrentUrl(), node.attr("href"), node, whitelist,
                                patterns, urlNodes, transforms)) {
                            NodeUtil.markFiltered(node, false);
                        }
                    } else {
                        String urlAttr = UrlUtil.urlFromAttr(node);
                        if (!CommonUtil.isEmpty(urlAttr) && UrlUtil.isUrlFiltered(browser.getCurrentUrl(),
                                urlAttr, node, whitelist, patterns, urlNodes, transforms)) {
                            NodeUtil.markFiltered(node, true);
                        }
                    }
                }
            });
        }
        if (WebApp.DEBUG) {
            try {
                FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis() + ".log.scrape"),
                        element.outerHtml(), "utf-8");
            } catch (IOException e) {
            }
        }
        return element;
    } catch (Browser.Retry r) {
        throw r;
    } catch (Browser.Fatal f) {
        throw f;
    } catch (Throwable t) {
        throw new ActionFailed(t);
    }
}

From source file:com.screenslicer.core.util.Util.java

public static void clean(Node node) {
    node.traverse(new NodeVisitor() {
        @Override/*from w  w  w .java2s .c  o  m*/
        public void tail(Node node, int depth) {
        }

        @Override
        public void head(Node node, int depth) {
            String classAttr = node.attr("class");
            classAttr = cleanClass(classAttr);
            if (CommonUtil.isEmpty(classAttr)) {
                node.removeAttr("class");
            } else {
                node.attr("class", classAttr);
            }
        }
    });
}

From source file:com.screenslicer.core.util.Util.java

public static Element openElement(final RemoteWebDriver driver, final String[] whitelist,
        final String[] patterns, final UrlTransform[] transforms) throws ActionFailed {
    try {//  ww w .j  a  va2 s . com
        driver.executeScript("      var all = document.getElementsByTagName('*');"
                + "for(var i = 0; i < all.length; i++){" + "  if(all[i].className){"
                + "    all[i].className=all[i].className.replace(/" + NODE_MARKER + "\\d+/g,'').replace(/"
                + HIDDEN_MARKER + "/g,'').replace(/" + FILTERED_MARKER + "/g,'').replace(/"
                + FILTERED_LENIENT_MARKER + "/g,'').replace(/\\s+/g,' ').trim();" + "  }" + "}" + isVisible
                + "for(var j = 0; j < all.length; j++){" + "  all[j].className += ' " + NODE_MARKER + "'+j+' ';"
                + "  if(!isVisible(all[j])){" + "    all[j].className += ' " + HIDDEN_MARKER + " ';" + "  }"
                + "}");
        String url = driver.getCurrentUrl();
        new URL(url);
        Element element = parse(driver.getPageSource(), url).body();
        element.traverse(new NodeVisitor() {
            @Override
            public void tail(Node node, int depth) {
            }

            @Override
            public void head(Node node, int depth) {
                if (!node.nodeName().equals("#text") && !isEmpty(node)) {
                    markVisible(node);
                }
            }
        });
        if ((whitelist != null && whitelist.length > 0) || (patterns != null && patterns.length > 0)) {
            element.traverse(new NodeVisitor() {
                @Override
                public void tail(Node node, int depth) {
                }

                @Override
                public void head(Node node, int depth) {
                    if (node.nodeName().equals("a")) {
                        if (isUrlFiltered(driver.getCurrentUrl(), node.attr("href"), whitelist, patterns,
                                transforms)) {
                            markFiltered(node, false);
                        }
                    } else {
                        String urlAttr = Util.urlFromAttr(node);
                        if (!CommonUtil.isEmpty(urlAttr) && isUrlFiltered(driver.getCurrentUrl(), urlAttr,
                                whitelist, patterns, transforms)) {
                            markFiltered(node, true);
                        }
                    }
                }
            });
        }
        return element;
    } catch (Exception e) {
        Log.exception(e);
        throw new ActionFailed(e);
    }
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected DetailledItem parse_result(String html) throws IOException {
    Document doc = Jsoup.parse(html);
    doc.setBaseUri(opac_url);//from www  . j  a  v  a 2s .com

    String html2 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showTitleActive", ENCODING);

    Document doc2 = Jsoup.parse(html2);
    doc2.setBaseUri(opac_url);

    String html3 = httpGet(opac_url + "/singleHit.do?methodToCall=activateTab&tab=showAvailabilityActive",
            ENCODING);

    Document doc3 = Jsoup.parse(html3);
    doc3.setBaseUri(opac_url);

    DetailledItem result = new DetailledItem();

    try {
        result.setId(doc.select("#bibtip_id").text().trim());
    } catch (Exception ex) {
        ex.printStackTrace();
    }
    List<String> reservationlinks = new ArrayList<>();
    for (Element link : doc3.select("#vormerkung a, #tab-content a")) {
        String href = link.absUrl("href");
        Map<String, String> hrefq = getQueryParamsFirst(href);
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }

        // Vormerken
        if (hrefq.get("methodToCall") != null) {
            if (hrefq.get("methodToCall").equals("doVormerkung")
                    || hrefq.get("methodToCall").equals("doBestellung")) {
                reservationlinks.add(href.split("\\?")[1]);
            }
        }
    }
    if (reservationlinks.size() == 1) {
        result.setReservable(true);
        result.setReservation_info(reservationlinks.get(0));
    } else if (reservationlinks.size() == 0) {
        result.setReservable(false);
    } else {
        // TODO: Multiple options - handle this case!
    }

    if (doc.select(".data td img").size() == 1) {
        result.setCover(doc.select(".data td img").first().attr("abs:src"));
        try {
            downloadCover(result);
        } catch (Exception e) {

        }
    }

    if (doc.select(".aw_teaser_title").size() == 1) {
        result.setTitle(doc.select(".aw_teaser_title").first().text().trim());
    } else if (doc.select(".data td strong").size() > 0) {
        result.setTitle(doc.select(".data td strong").first().text().trim());
    } else {
        result.setTitle("");
    }
    if (doc.select(".aw_teaser_title_zusatz").size() > 0) {
        result.addDetail(new Detail("Titelzusatz", doc.select(".aw_teaser_title_zusatz").text().trim()));
    }

    String title = "";
    String text = "";
    boolean takeover = false;
    Element detailtrs = doc2.select(".box-container .data td").first();
    for (Node node : detailtrs.childNodes()) {
        if (node instanceof Element) {
            if (((Element) node).tagName().equals("strong")) {
                title = ((Element) node).text().trim();
                text = "";
            } else {
                if (((Element) node).tagName().equals("a")
                        && (((Element) node).text().trim().contains("hier klicken") || title.equals("Link:"))) {
                    text = text + node.attr("href");
                    takeover = true;
                    break;
                }
            }
        } else if (node instanceof TextNode) {
            text = text + ((TextNode) node).text();
        }
    }
    if (!takeover) {
        text = "";
        title = "";
    }

    detailtrs = doc2.select("#tab-content .data td").first();
    if (detailtrs != null) {
        for (Node node : detailtrs.childNodes()) {
            if (node instanceof Element) {
                if (((Element) node).tagName().equals("strong")) {
                    if (!text.equals("") && !title.equals("")) {
                        result.addDetail(new Detail(title.trim(), text.trim()));
                        if (title.equals("Titel:")) {
                            result.setTitle(text.trim());
                        }
                        text = "";
                    }

                    title = ((Element) node).text().trim();
                } else {
                    if (((Element) node).tagName().equals("a")
                            && (((Element) node).text().trim().contains("hier klicken")
                                    || title.equals("Link:"))) {
                        text = text + node.attr("href");
                    } else {
                        text = text + ((Element) node).text();
                    }
                }
            } else if (node instanceof TextNode) {
                text = text + ((TextNode) node).text();
            }
        }
    } else {
        if (doc2.select("#tab-content .fulltitle tr").size() > 0) {
            Elements rows = doc2.select("#tab-content .fulltitle tr");
            for (Element tr : rows) {
                if (tr.children().size() == 2) {
                    Element valcell = tr.child(1);
                    String value = valcell.text().trim();
                    if (valcell.select("a").size() == 1) {
                        value = valcell.select("a").first().absUrl("href");
                    }
                    result.addDetail(new Detail(tr.child(0).text().trim(), value));
                }
            }
        } else {
            result.addDetail(new Detail(stringProvider.getString(StringProvider.ERROR),
                    stringProvider.getString(StringProvider.COULD_NOT_LOAD_DETAIL)));
        }
    }
    if (!text.equals("") && !title.equals("")) {
        result.addDetail(new Detail(title.trim(), text.trim()));
        if (title.equals("Titel:")) {
            result.setTitle(text.trim());
        }
    }
    for (Element link : doc3.select("#tab-content a")) {
        Map<String, String> hrefq = getQueryParamsFirst(link.absUrl("href"));
        if (result.getId() == null) {
            // ID retrieval
            String key = hrefq.get("katkey");
            if (key != null) {
                result.setId(key);
                break;
            }
        }
    }
    for (Element link : doc3.select(".box-container a")) {
        if (link.text().trim().equals("Download")) {
            result.addDetail(
                    new Detail(stringProvider.getString(StringProvider.DOWNLOAD), link.absUrl("href")));
        }
    }

    Map<String, Integer> copy_columnmap = new HashMap<>();
    // Default values
    copy_columnmap.put("barcode", 1);
    copy_columnmap.put("branch", 3);
    copy_columnmap.put("status", 4);
    Elements copy_columns = doc.select("#tab-content .data tr#bg2 th");
    for (int i = 0; i < copy_columns.size(); i++) {
        Element th = copy_columns.get(i);
        String head = th.text().trim();
        if (head.contains("Status")) {
            copy_columnmap.put("status", i);
        }
        if (head.contains("Zweigstelle")) {
            copy_columnmap.put("branch", i);
        }
        if (head.contains("Mediennummer")) {
            copy_columnmap.put("barcode", i);
        }
        if (head.contains("Standort")) {
            copy_columnmap.put("location", i);
        }
        if (head.contains("Signatur")) {
            copy_columnmap.put("signature", i);
        }
    }

    Pattern status_lent = Pattern.compile(
            "^(entliehen) bis ([0-9]{1,2}.[0-9]{1,2}.[0-9]{2," + "4}) \\(gesamte Vormerkungen: ([0-9]+)\\)$");
    Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$");

    Elements exemplartrs = doc.select("#tab-content .data tr").not("#bg2");
    DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
    for (Element tr : exemplartrs) {
        try {
            Copy copy = new Copy();
            Element status = tr.child(copy_columnmap.get("status"));
            Element barcode = tr.child(copy_columnmap.get("barcode"));
            String barcodetext = barcode.text().trim().replace(" Wegweiser", "");

            // STATUS
            String statustext;
            if (status.getElementsByTag("b").size() > 0) {
                statustext = status.getElementsByTag("b").text().trim();
            } else {
                statustext = status.text().trim();
            }
            if (copy_columnmap.get("status").equals(copy_columnmap.get("barcode"))) {
                Matcher matcher1 = status_and_barcode.matcher(statustext);
                if (matcher1.matches()) {
                    statustext = matcher1.group(1);
                    barcodetext = matcher1.group(2);
                }
            }

            Matcher matcher = status_lent.matcher(statustext);
            if (matcher.matches()) {
                copy.setStatus(matcher.group(1));
                copy.setReservations(matcher.group(3));
                copy.setReturnDate(fmt.parseLocalDate(matcher.group(2)));
            } else {
                copy.setStatus(statustext);
            }
            copy.setBarcode(barcodetext);
            if (status.select("a[href*=doVormerkung]").size() == 1) {
                copy.setResInfo(status.select("a[href*=doVormerkung]").attr("href").split("\\?")[1]);
            }

            String branchtext = tr.child(copy_columnmap.get("branch")).text().trim().replace(" Wegweiser", "");
            copy.setBranch(branchtext);

            if (copy_columnmap.containsKey("location")) {
                copy.setLocation(
                        tr.child(copy_columnmap.get("location")).text().trim().replace(" Wegweiser", ""));
            }

            if (copy_columnmap.containsKey("signature")) {
                copy.setShelfmark(
                        tr.child(copy_columnmap.get("signature")).text().trim().replace(" Wegweiser", ""));
            }

            result.addCopy(copy);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    try {
        Element isvolume = null;
        Map<String, String> volume = new HashMap<>();
        Elements links = doc.select(".data td a");
        int elcount = links.size();
        for (int eli = 0; eli < elcount; eli++) {
            List<NameValuePair> anyurl = URLEncodedUtils.parse(new URI(links.get(eli).attr("href")), "UTF-8");
            for (NameValuePair nv : anyurl) {
                if (nv.getName().equals("methodToCall") && nv.getValue().equals("volumeSearch")) {
                    isvolume = links.get(eli);
                } else if (nv.getName().equals("catKey")) {
                    volume.put("catKey", nv.getValue());
                } else if (nv.getName().equals("dbIdentifier")) {
                    volume.put("dbIdentifier", nv.getValue());
                }
            }
            if (isvolume != null) {
                volume.put("volume", "true");
                result.setVolumesearch(volume);
                break;
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return result;
}