Example usage for org.jsoup.nodes Element nodeName

List of usage examples for org.jsoup.nodes Element nodeName

Introduction

In this page you can find the example usage for org.jsoup.nodes Element nodeName.

Prototype

@Override
    public String nodeName() 

Source Link

Usage

From source file:akori.AKORI.java

public static void main(String[] args) throws IOException, InterruptedException {
    System.out.println("esto es AKORI");

    URL = "http://www.mbauchile.cl";
    PATH = "E:\\NetBeansProjects\\AKORI\\";
    NAME = "mbauchile.png";
    // Extrar DOM tree

    Document doc = Jsoup.connect(URL).timeout(0).get();

    // The Firefox driver supports javascript 
    WebDriver driver = new FirefoxDriver();
    driver.manage().window().maximize();
    System.out.println(driver.manage().window().getSize().toString());
    System.out.println(driver.manage().window().getPosition().toString());
    int xmax = driver.manage().window().getSize().width;
    int ymax = driver.manage().window().getSize().height;

    // Go to the URL page
    driver.get(URL);/*  www .ja  v  a  2s .  c  o m*/

    File screen = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
    FileUtils.copyFile(screen, new File(PATH + NAME));

    BufferedImage img = ImageIO.read(new File(PATH + NAME));
    //Graphics2D graph = img.createGraphics();

    BufferedImage img1 = new BufferedImage(xmax, ymax, BufferedImage.TYPE_INT_ARGB);
    Graphics2D graph1 = img.createGraphics();
    double[][] matrix = new double[ymax][xmax];
    BufferedReader in = new BufferedReader(new FileReader("et.txt"));
    String linea;
    double max = 0;
    graph1.drawImage(img, 0, 0, null);
    HashMap<String, Integer> lista = new HashMap<String, Integer>();
    int count = 0;
    for (int i = 0; (linea = in.readLine()) != null && i < 10000; ++i) {
        String[] datos = linea.split(",");
        int x = (int) Double.parseDouble(datos[0]);
        int y = (int) Double.parseDouble(datos[2]);
        long time = Double.valueOf(datos[4]).longValue();
        if (x >= xmax || y >= ymax)
            continue;
        if (time < 691215)
            continue;
        if (time > 705648)
            break;
        if (lista.containsKey(x + "," + y))
            lista.put(x + "," + y, lista.get(x + "," + y) + 1);
        else
            lista.put(x + "," + y, 1);
        ++count;
    }
    System.out.println(count);
    in.close();
    Iterator iter = lista.entrySet().iterator();
    Map.Entry e;
    for (String key : lista.keySet()) {
        Integer i = lista.get(key);
        if (max < i)
            max = i;
    }
    System.out.println(max);
    max = 0;
    while (iter.hasNext()) {
        e = (Map.Entry) iter.next();
        String xy = (String) e.getKey();
        String[] datos = xy.split(",");
        int x = Integer.parseInt(datos[0]);
        int y = Integer.parseInt(datos[1]);
        matrix[y][x] += (int) e.getValue();
        double aux;
        if ((aux = normalMatrix(matrix, y, x, ((int) e.getValue()) * 4)) > max) {
            max = aux;
        }
        //normalMatrix(matrix,x,y,20);
        if (matrix[y][x] > max)
            max = matrix[y][x];
    }
    int A, R, G, B, n;
    for (int i = 0; i < xmax; ++i) {
        for (int j = 0; j < ymax; ++j) {
            if (matrix[j][i] != 0) {
                n = (int) Math.round(matrix[j][i] * 100 / max);
                R = Math.round((255 * n) / 100);
                G = Math.round((255 * (100 - n)) / 100);
                B = 0;
                A = Math.round((255 * n) / 100);
                ;
                if (R > 255)
                    R = 255;
                if (R < 0)
                    R = 0;
                if (G > 255)
                    G = 255;
                if (G < 0)
                    G = 0;
                if (R < 50)
                    A = 0;
                graph1.setColor(new Color(R, G, B, A));
                graph1.fillOval(i, j, 1, 1);
            }
        }
    }
    //graph1.dispose();

    ImageIO.write(img, "png", new File("example.png"));
    System.out.println(max);

    graph1.setColor(Color.RED);
    // Extraer elementos
    Elements e1 = doc.body().getAllElements();
    int i = 1;
    ArrayList<String> tags = new ArrayList<String>();
    for (Element temp : e1) {

        if (tags.indexOf(temp.tagName()) == -1) {
            tags.add(temp.tagName());

            List<WebElement> query = driver.findElements(By.tagName(temp.tagName()));
            for (WebElement temp1 : query) {
                Point po = temp1.getLocation();
                Dimension d = temp1.getSize();
                if (d.width <= 0 || d.height <= 0 || po.x < 0 || po.y < 0)
                    continue;
                System.out.println(i + " " + temp.nodeName());
                System.out.println("  x: " + po.x + " y: " + po.y);
                System.out.println("  width: " + d.width + " height: " + d.height);
                graph1.draw(new Rectangle(po.x, po.y, d.width, d.height));
                ++i;
            }
        }
    }

    graph1.dispose();
    ImageIO.write(img, "png", new File(PATH + NAME));

    driver.quit();

}

From source file:Main.java

public static Element findFirstChildElement(Element element, String name) {
    List<Element> children = element.children();
    for (Element child : children) {
        if (child.nodeName().equalsIgnoreCase(name)) {
            return child;
        }/*from  ww w  .  j a  v  a  2s . co m*/
    }

    return null;
}

From source file:feedzilla.Feed.java

private void parser(Element entry) {
    boolean source = false;
    for (Element element : entry.children()) {
        switch (element.nodeName()) {
        case "id":
            this.id = Integer.parseInt(element.text().split(":")[1]);
            break;
        case "title":
            if (source) {
                this.source_title = element.text();
            } else {
                this.title = element.text();
            }/*from  w  w  w. j  a  v a 2 s  .  c om*/
            break;
        case "summary":
            this.summary = element.text().split("<br")[0];
            break;
        case "published":
            this.published = element.text();
            break;
        case "updated":
            this.updated = element.text();
            break;
        case "author":
            this.author = element.text();
            break;
        case "link":
            if (source) {
                this.source_link = element.attr("href");
            } else {
                this.link = element.attr("href");
            }
            break;
        case "rights":
            this.copyright = element.text();
            break;
        case "source":
            source = true;
            break;
        default:
            Log.debug("Unknow TAG: " + element.nodeName());
            break;
        }
    }
}

From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java

/**
 * Removes the HTML5 figure tag and saves the figcaption in the <img> tag's "alt" attribute for later use
 * @param htmlContent//from   w ww .j  av  a2s .  c o m
 * @return
 */
private Document removeFigureSaveFigcaption(String htmlContent) {
    Document doc = Jsoup.parseBodyFragment(htmlContent);
    //figure is a HTML5 tag not accepted by Tidy, so it should be replaced by the content <img>-tag, and the figcaption is saved in the "alt" attribute
    Elements figureElements = doc.select("figure");
    Element figcaptionNode = null;
    if (figureElements != null) {
        for (Iterator<Element> iterator = figureElements.iterator(); iterator.hasNext();) {
            Element figureElement = iterator.next();
            Elements figureChildren = figureElement.getAllElements();
            Node imageNode = null;
            if (figureChildren != null) {
                for (Element figureChild : figureChildren) {
                    if ("img".equals(figureChild.nodeName())) {
                        imageNode = figureChild;
                    } else {
                        if ("figcaption".equals(figureChild.nodeName())) {
                            figcaptionNode = figureChild;
                            //set "figcaption" text as value for "alt" attribute  
                            if (imageNode != null) {
                                imageNode.attr("alt", figcaptionNode.text());
                            }
                        }
                    }
                }
            }
            if (imageNode != null) {
                figureElement.replaceWith(imageNode);
            }
        }
    }
    return doc;
}

From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java

/**
 * //from ww  w  . j  av  a 2s.  c o  m
 *
 * @param e
 * @param node
 * @return
 */
public Element filter(Element e, Node node) throws NoSuchFunctionException, NoSuchAxisException {
    if (node.getTagName().equals("*") || node.getTagName().equals(e.nodeName())) {
        if (node.getPredicate() != null && StringUtils.isNotBlank(node.getPredicate().getValue())) {
            Predicate p = node.getPredicate();
            if (p.getOpEm() == null) {
                if (p.getValue().matches("\\d+") && getElIndex(e) == Integer.parseInt(p.getValue())) {
                    return e;
                } else if (p.getValue().endsWith("()")
                        && (Boolean) callFilterFunc(p.getValue().substring(0, p.getValue().length() - 2), e)) {
                    return e;
                } else if (p.getValue().startsWith("@")
                        && e.hasAttr(StringUtils.substringAfter(p.getValue(), "@"))) {
                    return e;
                }
                //todo p.value ~= contains(./@href,'renren.com')
            } else {
                if (p.getLeft().matches("[^/]+\\(\\)")) {
                    Object filterRes = p.getOpEm().excute(
                            callFilterFunc(p.getLeft().substring(0, p.getLeft().length() - 2), e).toString(),
                            p.getRight());
                    if (filterRes instanceof Boolean && (Boolean) filterRes) {
                        return e;
                    } else if (filterRes instanceof Integer
                            && e.siblingIndex() == Integer.parseInt(filterRes.toString())) {
                        return e;
                    }
                } else if (p.getLeft().startsWith("@")) {
                    String lValue = e.attr(p.getLeft().substring(1));
                    Object filterRes = p.getOpEm().excute(lValue, p.getRight());
                    if ((Boolean) filterRes) {
                        return e;
                    }
                } else {
                    // ???xpath?
                    List<Element> eltmp = new LinkedList<Element>();
                    eltmp.add(e);
                    List<JXNode> rstmp = evaluate(p.getLeft(), new Elements(eltmp));
                    if ((Boolean) p.getOpEm().excute(StringUtils.join(rstmp, ""), p.getRight())) {
                        return e;
                    }
                }
            }
        } else {
            return e;
        }
    }
    return null;
}

From source file:com.spd.ukraine.lucenewebsearch1.web.IndexingController.java

/**
 * Method used to perform recursive creation indexing for a given web page 
 * in search database./*from  w  w w. j a  va  2  s . c o  m*/
 *
 * @param webPage webPage.url is entered url
 * webPage.title is set
 * @param html Jsoup.Document of entered url
 * @param recursionNumber used to stop recursion at exceeding 
 * MAX_RECURSION_SEARCH_NUMBER
 */
private void indexElements(WebPage webPage, Document html, final int recursionNumber)
        throws IOException, ParseException {
    String title = html.title();
    if (referencedTitles.contains(title.trim())) {
        return;
    }
    referencedTitles.add(title.trim());
    webPage.setTitle(title);
    if (containsPage(webPage)) {
        System.out.println(webPage.getUrl() + " is already indexed");
        return;
    }
    Element prevElement = null;
    Elements elements = html.body().getAllElements(); //.getElementsByTag("a");
    addDoc(webPage, html.text());
    //        for (Element element : elements) {
    ////                System.out.println(element.nodeName() + " element.text() " 
    ////                        + element.text() + " url " 
    ////                        + element.absUrl("href"));
    //            if (element.nodeName().equalsIgnoreCase("body")) {
    //                addDoc(webPage, element.text());
    //                break;
    ////                continue;
    //            }
    //            if (null == prevElement) {
    //                prevElement = element;
    ////            } else if (prevElementContainsElementText(prevElement, element)) {
    ////                continue;
    //            }
    ////            if (null !== webPagesService.findWebPage(element.absUrl("href")))
    //            if (element.text().trim().isEmpty()) {
    //                continue;
    //            }
    ////            StringTokenizer str = new StringTokenizer(element.text());
    ////            str.
    //            addDoc(webPage, element.text());
    //        }
    if (recursionNumber > MAX_RECURSION_SEARCH_NUMBER || referencedSites.size() > MAX_NUMBER_SITES_INDEXED) {
        //            System.out.println(recursionNumber + " " 
        //                    + referencedSites.contains(webPage.getUrl()));
        return;
    }
    elements.parallelStream()
            .filter((Element e) -> e.nodeName().equalsIgnoreCase("a") && null != e.absUrl(HREF)
                    && !e.absUrl(HREF).trim().isEmpty() && !referencedSites.contains(e.absUrl(HREF))
                    && !referencedSites.contains(removeSharpEtc(e.absUrl(HREF))))
            .forEach((Element element) -> {
                WebPage webPage1 = new WebPage(element.absUrl(HREF));
                String url1 = webPage1.getUrl();
                //                    System.out.println(recursionNumber + " recursion for '" 
                //                            + url1 + "'");
                try {
                    Document htmlR = Jsoup.connect(url1).get();
                    indexElements(webPage1, htmlR, recursionNumber + 1);
                } catch (IOException | ParseException e) {
                    System.out.println("Exception " + e.getMessage());
                }
                referencedSites.add(url1);
            });
    //        for (Element element : elements) {
    //            if (!element.nodeName().equalsIgnoreCase("a")) {
    //                continue;
    //            }
    //            WebPage webPage1 = new WebPage(element.absUrl("href"));
    //            if (null == webPage1.getUrl() 
    //                    || webPage1.getUrl().isEmpty()
    //                    || referencedSites.contains(webPage1.getUrl())) {
    //                continue;
    //            }
    //            System.out.println(recursionNumber + "recursion for " 
    //                    + element.absUrl("href"));
    //            try {
    //                Document htmlR = Jsoup.connect(webPage1.getUrl()).get();
    //                webPage1.setTitle(htmlR.title());
    //                indexElements(webPage1, htmlR, recursionNumber + 1);
    //            } catch (IOException e) {
    //                System.out.println("IOException " + e.getMessage());
    //            }
    //            referencedSites.add(webPage1.getUrl());
    //        }
}

From source file:mml.handler.post.MMLPostHTMLHandler.java

/**
 * May happen but should not// w  w  w .j  a  v a 2s.  co  m
 * @param elem an element that is not a span, p or div
 */
private void parseOtherElement(Element elem) throws JSONException {
    List<Node> children = elem.childNodes();
    int offset = sb.length();
    String name = elem.attr("class");
    if (name == null || name.length() == 0)
        name = elem.nodeName();
    Range r = new Range(name, offset, 0);
    stil.add(r);
    for (Node child : children) {
        if (child instanceof Element)
            parseOtherElement((Element) child);
        else if (child instanceof TextNode)
            sb.append(((TextNode) child).getWholeText());
    }
    this.stil.updateLen(r, sb.length() - offset);
    prevWasMilestone = false;
}

From source file:mml.handler.post.MMLPostHTMLHandler.java

/**
 * Parse the body of the HTML fragment/*  w w  w. ja v a 2  s . co m*/
 * @param body should be contents of the target div in the editor
 * @throws JSONException 
 */
protected void parseBody(Element body) throws MMLSaveException {
    try {
        this.speller = new AeseSpeller(this.langCode);
        this.sb = new StringBuilder();
        String style = ScratchVersionSet.getDefaultStyleName(this.docid);
        stil = new STILDocument(style);
        pages = new STILDocument(style);
        if (body.nodeName().toLowerCase().equals("div"))
            parseDiv(body);
        else {
            List<Node> children = body.childNodes();
            for (Node child : children) {
                if (child instanceof Element) {
                    String nName = child.nodeName().toLowerCase();
                    if (nName.equals("div"))
                        parseDiv((Element) child);
                    else if (nName.equals("p"))
                        parsePara((Element) child, "p");
                    else if (nName.equals("span"))
                        parseSpan((Element) child);
                    else if (nName.matches("(h|H)\\d"))
                        parsePara((Element) child, nName);
                    else if (nName.equals("pre"))
                        parsePre((Element) child);
                    else
                        parseOtherElement((Element) child);

                }
                // else it is insignificant white space
            }
        }
        this.speller.cleanup();
    } catch (Exception e) {
        if (this.speller != null)
            this.speller.cleanup();
        throw new MMLSaveException(e);
    }
}

From source file:org.asqatasun.contentadapter.css.CSSJsoupPhlocContentAdapterImpl.java

/**
 * Retrieve css content and adapt it for each inline resource
 *///from w w w.j  av a 2 s.co  m
private void adaptInlineCSS() {
    Set<Long> relatedCssIdSet = new HashSet<>();

    for (Element el : inlineCssElements) {
        String attributeValue = el.attr("style");
        if (StringUtils.isNotBlank(attributeValue)) {
            Resource cssResource = new CSSResourceImpl(el.nodeName() + "{" + attributeValue + "}", 0,
                    new InlineRsrc());
            StylesheetContent cssContent = getStylesheetFromInlineResource(cssResource.getResource());
            adaptContent(cssContent, cssResource, getCurrentResourcePath(el.baseUri()), null);
            relatedCssIdSet.add(getContentDataService().saveOrUpdate(cssContent).getId());
        }
    }
    getContentDataService().saveContentRelationShip(getSSP(), relatedCssIdSet);
}

From source file:org.asqatasun.processing.ProcessRemarkServiceImpl.java

@Override
public void addSourceCodeRemarkOnElement(TestSolution processResult, Element element, String messageCode,
        Collection<EvidenceElement> evidenceElementList) {
    SourceCodeRemark remark = processRemarkDataService.getSourceCodeRemark(processResult, messageCode);

    if (element != null) {
        remark.setLineNumber(searchElementLineNumber(element));
        remark.setTarget(element.nodeName());
        remark.setSnippet(getSnippetFromElement(element));
    } else {/*www  .j  a  v a 2 s .co  m*/
        remark.setLineNumber(-1);
    }
    if (CollectionUtils.isNotEmpty(evidenceElementList)) {
        for (EvidenceElement ee : evidenceElementList) {
            remark.addElement(ee);
            ee.setProcessRemark(remark);
        }
    }
    remarkSet.add(remark);
}