List of usage examples for org.jsoup.nodes Node getClass
@HotSpotIntrinsicCandidate public final native Class<?> getClass();
From source file:net.poemerchant.scraper.ShopScraper.java
public List<Buyout> scrapeItemBuyouts(int noOfItems) { buyouts = new ArrayList<Buyout>(noOfItems); for (int i = 0; i < noOfItems; i++) { Buyout buyout = Buyout.NONE;/*from www . j ava2s. co m*/ Node itemElem = doc.select("#item-fragment-" + i).first(); Element itemElemNext = doc.select("#item-fragment-" + (i + 1)).first(); if (itemElem != null) { while (!itemElem.equals(itemElemNext)) { itemElem = itemElem.nextSibling(); if (itemElem == null) { // case where there is no b/o set and we've reached the end of a spoiler break; } if (Element.class.isAssignableFrom(itemElem.getClass())) continue; String boRaw = StringUtils.trim(itemElem.toString()); String[] split = StringUtils.split(boRaw); if (split.length == 3) { BuyoutMode buyoutMode = BuyoutMode.parse(split[0]); if (buyoutMode != BuyoutMode.unknown) { buyout = new Buyout(boRaw); break; } } } } else { logger.severe( "Actual item in the OP was not found. Buyout will be defaulted to NONE. Item index is " + i); } buyouts.add(buyout); } return buyouts; }
From source file:org.norvelle.addressdiscoverer.parse.structured.BackwardsFlattenedDocumentIterator.java
/** * A reverse treewalker that accumulates its results in the textNodes List of nodes. * /*from w ww. j ava 2 s .co m*/ * @param currNode */ private void walkNodeBackwards(Node currNode, String encoding) throws UnsupportedEncodingException, EndNodeWalkingException { this.status.incrementNumericProgress(); List<Node> children = currNode.childNodes(); for (int i = children.size() - 1; i >= 0; i--) { Node child = children.get(i); if (!child.getClass().equals(TextNode.class)) this.walkNodeBackwards(child, encoding); else { TextNode textChild = (TextNode) child; String htmlEncodedString = WordUtils.capitalizeFully(textChild.getWholeText()); String processedString = Utils.decodeHtml(htmlEncodedString, encoding); boolean isName; try { counter++; if (processedString.trim().isEmpty()) isName = false; else isName = Name.isName(processedString); } catch (Exception ex) { logger.log(Level.SEVERE, ex.getMessage()); logger.log(Level.SEVERE, ExceptionUtils.getStackTrace(ex)); throw new EndNodeWalkingException(String.format("Could not test for nameness: %s %s", ex.getClass().getName(), ex.getMessage())); } if (isName) this.status.reportProgressText("Found name: " + processedString); if (!this.elementsWithNames.contains((Element) currNode) && isName) { this.elementsWithNames.add(0, (Element) currNode); /*this.status.reportProgressText( String.format(" Adding <%s> with content '%s'", currNode.nodeName(), processedString)); */ } } } }
From source file:org.norvelle.addressdiscoverer.parse.unstructured.ForwardsFlattenedDocumentIterator.java
/** * A reverse treewalker that accumulates its results in the textNodes List of nodes. * /*from w w w. j a v a2 s . c om*/ * @param currNode */ private void walkNodeForwards(Node currNode, String encoding) throws UnsupportedEncodingException, EndNodeWalkingException { this.status.incrementNumericProgress(); List<Node> children = currNode.childNodes(); for (int i = 0; i < children.size(); i++) { Node child = children.get(i); if (!child.getClass().equals(TextNode.class)) this.walkNodeForwards(child, encoding); else { TextNode textChild = (TextNode) child; String htmlEncodedString = WordUtils.capitalizeFully(textChild.getWholeText()); String processedString = Utils.decodeHtml(htmlEncodedString, encoding); if (processedString.trim().isEmpty()) continue; boolean isName = Name.isName(processedString); if (isName) { this.status.reportProgressText("Found name: " + processedString); if (!this.elementsWithNames.contains((Element) currNode)) { this.elementsWithNames.add(0, (Element) currNode); if (lastNameContainingElement != null) this.intermediateElementMap.put((Element) lastNameContainingElement, intermediateValuesList); lastNameContainingElement = (Element) currNode; intermediateValuesList = new ArrayList<>(); } } else { intermediateValuesList.add(this.extractText((Element) currNode)); } // isName } // if (!child... } // for(int i... }