List of usage examples for org.jsoup.nodes Element nodeName
@Override
public String nodeName()
From source file:akori.AKORI.java
public static void main(String[] args) throws IOException, InterruptedException { System.out.println("esto es AKORI"); URL = "http://www.mbauchile.cl"; PATH = "E:\\NetBeansProjects\\AKORI\\"; NAME = "mbauchile.png"; // Extrar DOM tree Document doc = Jsoup.connect(URL).timeout(0).get(); // The Firefox driver supports javascript WebDriver driver = new FirefoxDriver(); driver.manage().window().maximize(); System.out.println(driver.manage().window().getSize().toString()); System.out.println(driver.manage().window().getPosition().toString()); int xmax = driver.manage().window().getSize().width; int ymax = driver.manage().window().getSize().height; // Go to the URL page driver.get(URL);/* www .ja v a 2s . c o m*/ File screen = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE); FileUtils.copyFile(screen, new File(PATH + NAME)); BufferedImage img = ImageIO.read(new File(PATH + NAME)); //Graphics2D graph = img.createGraphics(); BufferedImage img1 = new BufferedImage(xmax, ymax, BufferedImage.TYPE_INT_ARGB); Graphics2D graph1 = img.createGraphics(); double[][] matrix = new double[ymax][xmax]; BufferedReader in = new BufferedReader(new FileReader("et.txt")); String linea; double max = 0; graph1.drawImage(img, 0, 0, null); HashMap<String, Integer> lista = new HashMap<String, Integer>(); int count = 0; for (int i = 0; (linea = in.readLine()) != null && i < 10000; ++i) { String[] datos = linea.split(","); int x = (int) Double.parseDouble(datos[0]); int y = (int) Double.parseDouble(datos[2]); long time = Double.valueOf(datos[4]).longValue(); if (x >= xmax || y >= ymax) continue; if (time < 691215) continue; if (time > 705648) break; if (lista.containsKey(x + "," + y)) lista.put(x + "," + y, lista.get(x + "," + y) + 1); else lista.put(x + "," + y, 1); ++count; } System.out.println(count); in.close(); Iterator iter = lista.entrySet().iterator(); Map.Entry e; for (String key : lista.keySet()) { Integer i = lista.get(key); if (max < i) max = i; } System.out.println(max); max = 0; while (iter.hasNext()) { e = (Map.Entry) iter.next(); String xy = (String) e.getKey(); String[] datos = xy.split(","); int x = Integer.parseInt(datos[0]); int y = Integer.parseInt(datos[1]); matrix[y][x] += (int) e.getValue(); double aux; if ((aux = normalMatrix(matrix, y, x, ((int) e.getValue()) * 4)) > max) { max = aux; } //normalMatrix(matrix,x,y,20); if (matrix[y][x] > max) max = matrix[y][x]; } int A, R, G, B, n; for (int i = 0; i < xmax; ++i) { for (int j = 0; j < ymax; ++j) { if (matrix[j][i] != 0) { n = (int) Math.round(matrix[j][i] * 100 / max); R = Math.round((255 * n) / 100); G = Math.round((255 * (100 - n)) / 100); B = 0; A = Math.round((255 * n) / 100); ; if (R > 255) R = 255; if (R < 0) R = 0; if (G > 255) G = 255; if (G < 0) G = 0; if (R < 50) A = 0; graph1.setColor(new Color(R, G, B, A)); graph1.fillOval(i, j, 1, 1); } } } //graph1.dispose(); ImageIO.write(img, "png", new File("example.png")); System.out.println(max); graph1.setColor(Color.RED); // Extraer elementos Elements e1 = doc.body().getAllElements(); int i = 1; ArrayList<String> tags = new ArrayList<String>(); for (Element temp : e1) { if (tags.indexOf(temp.tagName()) == -1) { tags.add(temp.tagName()); List<WebElement> query = driver.findElements(By.tagName(temp.tagName())); for (WebElement temp1 : query) { Point po = temp1.getLocation(); Dimension d = temp1.getSize(); if (d.width <= 0 || d.height <= 0 || po.x < 0 || po.y < 0) continue; System.out.println(i + " " + temp.nodeName()); System.out.println(" x: " + po.x + " y: " + po.y); System.out.println(" width: " + d.width + " height: " + d.height); graph1.draw(new Rectangle(po.x, po.y, d.width, d.height)); ++i; } } } graph1.dispose(); ImageIO.write(img, "png", new File(PATH + NAME)); driver.quit(); }
From source file:Main.java
public static Element findFirstChildElement(Element element, String name) { List<Element> children = element.children(); for (Element child : children) { if (child.nodeName().equalsIgnoreCase(name)) { return child; }/*from ww w . j a v a 2s . co m*/ } return null; }
From source file:feedzilla.Feed.java
private void parser(Element entry) { boolean source = false; for (Element element : entry.children()) { switch (element.nodeName()) { case "id": this.id = Integer.parseInt(element.text().split(":")[1]); break; case "title": if (source) { this.source_title = element.text(); } else { this.title = element.text(); }/*from w w w. j a v a 2 s . c om*/ break; case "summary": this.summary = element.text().split("<br")[0]; break; case "published": this.published = element.text(); break; case "updated": this.updated = element.text(); break; case "author": this.author = element.text(); break; case "link": if (source) { this.source_link = element.attr("href"); } else { this.link = element.attr("href"); } break; case "rights": this.copyright = element.text(); break; case "source": source = true; break; default: Log.debug("Unknow TAG: " + element.nodeName()); break; } } }
From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java
/** * Removes the HTML5 figure tag and saves the figcaption in the <img> tag's "alt" attribute for later use * @param htmlContent//from w ww .j av a2s . c o m * @return */ private Document removeFigureSaveFigcaption(String htmlContent) { Document doc = Jsoup.parseBodyFragment(htmlContent); //figure is a HTML5 tag not accepted by Tidy, so it should be replaced by the content <img>-tag, and the figcaption is saved in the "alt" attribute Elements figureElements = doc.select("figure"); Element figcaptionNode = null; if (figureElements != null) { for (Iterator<Element> iterator = figureElements.iterator(); iterator.hasNext();) { Element figureElement = iterator.next(); Elements figureChildren = figureElement.getAllElements(); Node imageNode = null; if (figureChildren != null) { for (Element figureChild : figureChildren) { if ("img".equals(figureChild.nodeName())) { imageNode = figureChild; } else { if ("figcaption".equals(figureChild.nodeName())) { figcaptionNode = figureChild; //set "figcaption" text as value for "alt" attribute if (imageNode != null) { imageNode.attr("alt", figcaptionNode.text()); } } } } } if (imageNode != null) { figureElement.replaceWith(imageNode); } } } return doc; }
From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java
/** * //from ww w . j av a 2s. c o m * * @param e * @param node * @return */ public Element filter(Element e, Node node) throws NoSuchFunctionException, NoSuchAxisException { if (node.getTagName().equals("*") || node.getTagName().equals(e.nodeName())) { if (node.getPredicate() != null && StringUtils.isNotBlank(node.getPredicate().getValue())) { Predicate p = node.getPredicate(); if (p.getOpEm() == null) { if (p.getValue().matches("\\d+") && getElIndex(e) == Integer.parseInt(p.getValue())) { return e; } else if (p.getValue().endsWith("()") && (Boolean) callFilterFunc(p.getValue().substring(0, p.getValue().length() - 2), e)) { return e; } else if (p.getValue().startsWith("@") && e.hasAttr(StringUtils.substringAfter(p.getValue(), "@"))) { return e; } //todo p.value ~= contains(./@href,'renren.com') } else { if (p.getLeft().matches("[^/]+\\(\\)")) { Object filterRes = p.getOpEm().excute( callFilterFunc(p.getLeft().substring(0, p.getLeft().length() - 2), e).toString(), p.getRight()); if (filterRes instanceof Boolean && (Boolean) filterRes) { return e; } else if (filterRes instanceof Integer && e.siblingIndex() == Integer.parseInt(filterRes.toString())) { return e; } } else if (p.getLeft().startsWith("@")) { String lValue = e.attr(p.getLeft().substring(1)); Object filterRes = p.getOpEm().excute(lValue, p.getRight()); if ((Boolean) filterRes) { return e; } } else { // ???xpath? List<Element> eltmp = new LinkedList<Element>(); eltmp.add(e); List<JXNode> rstmp = evaluate(p.getLeft(), new Elements(eltmp)); if ((Boolean) p.getOpEm().excute(StringUtils.join(rstmp, ""), p.getRight())) { return e; } } } } else { return e; } } return null; }
From source file:com.spd.ukraine.lucenewebsearch1.web.IndexingController.java
/** * Method used to perform recursive creation indexing for a given web page * in search database./*from w w w. j a va 2 s . c o m*/ * * @param webPage webPage.url is entered url * webPage.title is set * @param html Jsoup.Document of entered url * @param recursionNumber used to stop recursion at exceeding * MAX_RECURSION_SEARCH_NUMBER */ private void indexElements(WebPage webPage, Document html, final int recursionNumber) throws IOException, ParseException { String title = html.title(); if (referencedTitles.contains(title.trim())) { return; } referencedTitles.add(title.trim()); webPage.setTitle(title); if (containsPage(webPage)) { System.out.println(webPage.getUrl() + " is already indexed"); return; } Element prevElement = null; Elements elements = html.body().getAllElements(); //.getElementsByTag("a"); addDoc(webPage, html.text()); // for (Element element : elements) { //// System.out.println(element.nodeName() + " element.text() " //// + element.text() + " url " //// + element.absUrl("href")); // if (element.nodeName().equalsIgnoreCase("body")) { // addDoc(webPage, element.text()); // break; //// continue; // } // if (null == prevElement) { // prevElement = element; //// } else if (prevElementContainsElementText(prevElement, element)) { //// continue; // } //// if (null !== webPagesService.findWebPage(element.absUrl("href"))) // if (element.text().trim().isEmpty()) { // continue; // } //// StringTokenizer str = new StringTokenizer(element.text()); //// str. // addDoc(webPage, element.text()); // } if (recursionNumber > MAX_RECURSION_SEARCH_NUMBER || referencedSites.size() > MAX_NUMBER_SITES_INDEXED) { // System.out.println(recursionNumber + " " // + referencedSites.contains(webPage.getUrl())); return; } elements.parallelStream() .filter((Element e) -> e.nodeName().equalsIgnoreCase("a") && null != e.absUrl(HREF) && !e.absUrl(HREF).trim().isEmpty() && !referencedSites.contains(e.absUrl(HREF)) && !referencedSites.contains(removeSharpEtc(e.absUrl(HREF)))) .forEach((Element element) -> { WebPage webPage1 = new WebPage(element.absUrl(HREF)); String url1 = webPage1.getUrl(); // System.out.println(recursionNumber + " recursion for '" // + url1 + "'"); try { Document htmlR = Jsoup.connect(url1).get(); indexElements(webPage1, htmlR, recursionNumber + 1); } catch (IOException | ParseException e) { System.out.println("Exception " + e.getMessage()); } referencedSites.add(url1); }); // for (Element element : elements) { // if (!element.nodeName().equalsIgnoreCase("a")) { // continue; // } // WebPage webPage1 = new WebPage(element.absUrl("href")); // if (null == webPage1.getUrl() // || webPage1.getUrl().isEmpty() // || referencedSites.contains(webPage1.getUrl())) { // continue; // } // System.out.println(recursionNumber + "recursion for " // + element.absUrl("href")); // try { // Document htmlR = Jsoup.connect(webPage1.getUrl()).get(); // webPage1.setTitle(htmlR.title()); // indexElements(webPage1, htmlR, recursionNumber + 1); // } catch (IOException e) { // System.out.println("IOException " + e.getMessage()); // } // referencedSites.add(webPage1.getUrl()); // } }
From source file:mml.handler.post.MMLPostHTMLHandler.java
/** * May happen but should not// w w w .j a v a 2s. co m * @param elem an element that is not a span, p or div */ private void parseOtherElement(Element elem) throws JSONException { List<Node> children = elem.childNodes(); int offset = sb.length(); String name = elem.attr("class"); if (name == null || name.length() == 0) name = elem.nodeName(); Range r = new Range(name, offset, 0); stil.add(r); for (Node child : children) { if (child instanceof Element) parseOtherElement((Element) child); else if (child instanceof TextNode) sb.append(((TextNode) child).getWholeText()); } this.stil.updateLen(r, sb.length() - offset); prevWasMilestone = false; }
From source file:mml.handler.post.MMLPostHTMLHandler.java
/** * Parse the body of the HTML fragment/* w w w. ja v a 2 s . co m*/ * @param body should be contents of the target div in the editor * @throws JSONException */ protected void parseBody(Element body) throws MMLSaveException { try { this.speller = new AeseSpeller(this.langCode); this.sb = new StringBuilder(); String style = ScratchVersionSet.getDefaultStyleName(this.docid); stil = new STILDocument(style); pages = new STILDocument(style); if (body.nodeName().toLowerCase().equals("div")) parseDiv(body); else { List<Node> children = body.childNodes(); for (Node child : children) { if (child instanceof Element) { String nName = child.nodeName().toLowerCase(); if (nName.equals("div")) parseDiv((Element) child); else if (nName.equals("p")) parsePara((Element) child, "p"); else if (nName.equals("span")) parseSpan((Element) child); else if (nName.matches("(h|H)\\d")) parsePara((Element) child, nName); else if (nName.equals("pre")) parsePre((Element) child); else parseOtherElement((Element) child); } // else it is insignificant white space } } this.speller.cleanup(); } catch (Exception e) { if (this.speller != null) this.speller.cleanup(); throw new MMLSaveException(e); } }
From source file:org.asqatasun.contentadapter.css.CSSJsoupPhlocContentAdapterImpl.java
/** * Retrieve css content and adapt it for each inline resource *///from w w w.j av a 2 s.co m private void adaptInlineCSS() { Set<Long> relatedCssIdSet = new HashSet<>(); for (Element el : inlineCssElements) { String attributeValue = el.attr("style"); if (StringUtils.isNotBlank(attributeValue)) { Resource cssResource = new CSSResourceImpl(el.nodeName() + "{" + attributeValue + "}", 0, new InlineRsrc()); StylesheetContent cssContent = getStylesheetFromInlineResource(cssResource.getResource()); adaptContent(cssContent, cssResource, getCurrentResourcePath(el.baseUri()), null); relatedCssIdSet.add(getContentDataService().saveOrUpdate(cssContent).getId()); } } getContentDataService().saveContentRelationShip(getSSP(), relatedCssIdSet); }
From source file:org.asqatasun.processing.ProcessRemarkServiceImpl.java
@Override public void addSourceCodeRemarkOnElement(TestSolution processResult, Element element, String messageCode, Collection<EvidenceElement> evidenceElementList) { SourceCodeRemark remark = processRemarkDataService.getSourceCodeRemark(processResult, messageCode); if (element != null) { remark.setLineNumber(searchElementLineNumber(element)); remark.setTarget(element.nodeName()); remark.setSnippet(getSnippetFromElement(element)); } else {/*www .j a v a 2 s .co m*/ remark.setLineNumber(-1); } if (CollectionUtils.isNotEmpty(evidenceElementList)) { for (EvidenceElement ee : evidenceElementList) { remark.addElement(ee); ee.setProcessRemark(remark); } } remarkSet.add(remark); }