List of usage examples for org.jsoup.nodes Node nextSibling
public Node nextSibling()
From source file:net.poemerchant.scraper.ShopScraper.java
public List<Buyout> scrapeItemBuyouts(int noOfItems) { buyouts = new ArrayList<Buyout>(noOfItems); for (int i = 0; i < noOfItems; i++) { Buyout buyout = Buyout.NONE;//from w ww . j a va 2 s .c om Node itemElem = doc.select("#item-fragment-" + i).first(); Element itemElemNext = doc.select("#item-fragment-" + (i + 1)).first(); if (itemElem != null) { while (!itemElem.equals(itemElemNext)) { itemElem = itemElem.nextSibling(); if (itemElem == null) { // case where there is no b/o set and we've reached the end of a spoiler break; } if (Element.class.isAssignableFrom(itemElem.getClass())) continue; String boRaw = StringUtils.trim(itemElem.toString()); String[] split = StringUtils.split(boRaw); if (split.length == 3) { BuyoutMode buyoutMode = BuyoutMode.parse(split[0]); if (buyoutMode != BuyoutMode.unknown) { buyout = new Buyout(boRaw); break; } } } } else { logger.severe( "Actual item in the OP was not found. Buyout will be defaulted to NONE. Item index is " + i); } buyouts.add(buyout); } return buyouts; }
From source file:mml.handler.post.MMLPostHTMLHandler.java
/** * Get the text of the element/* w ww . ja va 2 s . co m*/ * @param elem the element in question * @return */ String getTextOf(Node elem) { if (elem instanceof TextNode) return ((TextNode) elem).getWholeText(); else if (elem instanceof Element) { String nName = elem.nodeName().toLowerCase(); // skip milestones if (nName.equals("span") && ((Element) elem).attr("class") != null && isMilestone(((Element) elem).attr("class"))) { int offset = sb.length(); String name = ((Element) elem).attr("class"); Range r = new Range(name, offset, 0); try { pages.add(r); } catch (JSONException e) { } return getTextOf(elem.nextSibling()); } else { List<Node> children = elem.childNodes(); StringBuilder concat = new StringBuilder(); for (Node child : children) { concat.append(getTextOf(child)); } return concat.toString(); } } else return ""; }
From source file:org.jsweet.input.typescriptdef.visitor.DocFiller.java
@Override public void head(Node node, int depth) { if (!found) { if (node.attr("id") != null && node.attr("id").equalsIgnoreCase("Quick_Links")) { found = true;//from w w w .j a va2 s . c om while (DocFiller.isLastSiblingTag(node)) { node = node.parent(); } Node n = node; while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) { description.append(n.outerHtml()); } String s = Jsoup.parse(description.toString()).text(); if ("".equals(s.trim())) { if (n == null) { n = node; } while ((n = n.nextSibling()) != null && !("h2".equals(n.nodeName()) || "h3".equals(n.nodeName()))) { description.append(n.outerHtml()); } } typeDeclaration.setDocumentation("/** " + DocFiller.removeTags(description.toString()) + " */"); docFiller.countDoc(true); } } }
From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java
private boolean findDocumentParts(Node root) { Node node = root; int depth = 0; while (node != null) { if (node.nodeName().compareTo("#text") != 0) { HashMap<String, Integer> level = allLevels.get(depth); // System.out.println(depth + " " + allLevels.size()); if (level.containsKey(node.nodeName() + "[class=" + node.attr("class") + "]")) { Integer get = level.get(node.nodeName() + "[class=" + node.attr("class") + "]"); level.put(node.nodeName() + "[class=" + node.attr("class") + "]", get + 1); } else { level.put(node.nodeName() + "[class=" + node.attr("class") + "]", 1); }/*ww w .j a va2s. c o m*/ } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } //ak je 0 alebo 1 datum, vratime false, kedze sa to neda zistit if (dateCount < 2) { return false; } else { return findOnePart(dateCount); } }
From source file:sk.svec.jan.acb.extraction.DiscussionFinder.java
private void traversePage(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // if(node.attr("class").compareTo("contribution")==0){ // System.out.println(depth); // } if (maxDepth < depth) { maxDepth = depth;/*from ww w. j a v a 2 s .c om*/ } boolean analyze = analyze(node); if (analyze) { break; } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
private void markBadText(Node root) { Node node = root; int depth = 0; while (node != null) { //ak sa jedna o text, ktory ma menej ako 15 znakov if (node.nodeName().compareTo("#text") == 0) { if (node.toString().trim().length() < 20) { nodesToRemove.add(node); // System.out.println(node); }//from w w w .j a v a 2 s .co m } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
public Node removeNodes(Node root, Node nodeToRemove) { Node node = root; Node ntr = nodeToRemove;/*w w w . ja v a 2 s . co m*/ int depth = 0; while (node != null) { if (node.equals(ntr)) { node.remove(); return root; } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } return root; }
From source file:sk.svec.jan.acb.extraction.Finder.java
public void traversePage(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // System.out.println(node.attributes()); boolean analyze = analyze(node); if (analyze) { break; }/*from w ww . j a v a 2s.co m*/ if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }
From source file:sk.svec.jan.acb.extraction.Finder.java
public void traversePageFindAuthor(Node root) { Node node = root; int depth = 0; while (node != null) { // System.out.println(depth + " " + node.nodeName() + " " + node.childNodeSize()); // System.out.println(node.attributes()); for (Attribute attribute : node.attributes().asList()) { String value = attribute.getValue(); if (!foundAuthor) { foundAuthor = findAuthorInText(node, value); break; }// www . j a v a 2s. c om } if (node.childNodeSize() > 0) { node = node.childNode(0); depth++; } else { while (node.nextSibling() == null && depth > 0) { node = node.parentNode(); depth--; } if (node == root) { break; } node = node.nextSibling(); } } }