Example usage for org.jsoup.nodes Element nodeName

List of usage examples for org.jsoup.nodes Element nodeName


In this page you can find the example usage for org.jsoup.nodes Element nodeName.


    public String nodeName() 

Source Link


From source file:akori.AKORI.java

public static void main(String[] args) throws IOException, InterruptedException {
    System.out.println("esto es AKORI");

    URL = "http://www.mbauchile.cl";
    PATH = "E:\\NetBeansProjects\\AKORI\\";
    NAME = "mbauchile.png";
    // Extrar DOM tree

    Document doc = Jsoup.connect(URL).timeout(0).get();

    // The Firefox driver supports javascript 
    WebDriver driver = new FirefoxDriver();
    int xmax = driver.manage().window().getSize().width;
    int ymax = driver.manage().window().getSize().height;

    // Go to the URL page
    driver.get(URL);/*  www .ja  v  a  2s .  c  o m*/

    File screen = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
    FileUtils.copyFile(screen, new File(PATH + NAME));

    BufferedImage img = ImageIO.read(new File(PATH + NAME));
    //Graphics2D graph = img.createGraphics();

    BufferedImage img1 = new BufferedImage(xmax, ymax, BufferedImage.TYPE_INT_ARGB);
    Graphics2D graph1 = img.createGraphics();
    double[][] matrix = new double[ymax][xmax];
    BufferedReader in = new BufferedReader(new FileReader("et.txt"));
    String linea;
    double max = 0;
    graph1.drawImage(img, 0, 0, null);
    HashMap<String, Integer> lista = new HashMap<String, Integer>();
    int count = 0;
    for (int i = 0; (linea = in.readLine()) != null && i < 10000; ++i) {
        String[] datos = linea.split(",");
        int x = (int) Double.parseDouble(datos[0]);
        int y = (int) Double.parseDouble(datos[2]);
        long time = Double.valueOf(datos[4]).longValue();
        if (x >= xmax || y >= ymax)
        if (time < 691215)
        if (time > 705648)
        if (lista.containsKey(x + "," + y))
            lista.put(x + "," + y, lista.get(x + "," + y) + 1);
            lista.put(x + "," + y, 1);
    Iterator iter = lista.entrySet().iterator();
    Map.Entry e;
    for (String key : lista.keySet()) {
        Integer i = lista.get(key);
        if (max < i)
            max = i;
    max = 0;
    while (iter.hasNext()) {
        e = (Map.Entry) iter.next();
        String xy = (String) e.getKey();
        String[] datos = xy.split(",");
        int x = Integer.parseInt(datos[0]);
        int y = Integer.parseInt(datos[1]);
        matrix[y][x] += (int) e.getValue();
        double aux;
        if ((aux = normalMatrix(matrix, y, x, ((int) e.getValue()) * 4)) > max) {
            max = aux;
        if (matrix[y][x] > max)
            max = matrix[y][x];
    int A, R, G, B, n;
    for (int i = 0; i < xmax; ++i) {
        for (int j = 0; j < ymax; ++j) {
            if (matrix[j][i] != 0) {
                n = (int) Math.round(matrix[j][i] * 100 / max);
                R = Math.round((255 * n) / 100);
                G = Math.round((255 * (100 - n)) / 100);
                B = 0;
                A = Math.round((255 * n) / 100);
                if (R > 255)
                    R = 255;
                if (R < 0)
                    R = 0;
                if (G > 255)
                    G = 255;
                if (G < 0)
                    G = 0;
                if (R < 50)
                    A = 0;
                graph1.setColor(new Color(R, G, B, A));
                graph1.fillOval(i, j, 1, 1);

    ImageIO.write(img, "png", new File("example.png"));

    // Extraer elementos
    Elements e1 = doc.body().getAllElements();
    int i = 1;
    ArrayList<String> tags = new ArrayList<String>();
    for (Element temp : e1) {

        if (tags.indexOf(temp.tagName()) == -1) {

            List<WebElement> query = driver.findElements(By.tagName(temp.tagName()));
            for (WebElement temp1 : query) {
                Point po = temp1.getLocation();
                Dimension d = temp1.getSize();
                if (d.width <= 0 || d.height <= 0 || po.x < 0 || po.y < 0)
                System.out.println(i + " " + temp.nodeName());
                System.out.println("  x: " + po.x + " y: " + po.y);
                System.out.println("  width: " + d.width + " height: " + d.height);
                graph1.draw(new Rectangle(po.x, po.y, d.width, d.height));

    ImageIO.write(img, "png", new File(PATH + NAME));



From source file:Main.java

public static Element findFirstChildElement(Element element, String name) {
    List<Element> children = element.children();
    for (Element child : children) {
        if (child.nodeName().equalsIgnoreCase(name)) {
            return child;
        }/*from  ww w  .  j a  v  a  2s . co m*/

    return null;

From source file:feedzilla.Feed.java

private void parser(Element entry) {
    boolean source = false;
    for (Element element : entry.children()) {
        switch (element.nodeName()) {
        case "id":
            this.id = Integer.parseInt(element.text().split(":")[1]);
        case "title":
            if (source) {
                this.source_title = element.text();
            } else {
                this.title = element.text();
            }/*from  w  w  w. j  a  v a 2 s  .  c om*/
        case "summary":
            this.summary = element.text().split("<br")[0];
        case "published":
            this.published = element.text();
        case "updated":
            this.updated = element.text();
        case "author":
            this.author = element.text();
        case "link":
            if (source) {
                this.source_link = element.attr("href");
            } else {
                this.link = element.attr("href");
        case "rights":
            this.copyright = element.text();
        case "source":
            source = true;
            Log.debug("Unknow TAG: " + element.nodeName());

From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java

 * Removes the HTML5 figure tag and saves the figcaption in the <img> tag's "alt" attribute for later use
 * @param htmlContent//from   w ww .j  av  a2s .  c o m
 * @return
private Document removeFigureSaveFigcaption(String htmlContent) {
    Document doc = Jsoup.parseBodyFragment(htmlContent);
    //figure is a HTML5 tag not accepted by Tidy, so it should be replaced by the content <img>-tag, and the figcaption is saved in the "alt" attribute
    Elements figureElements = doc.select("figure");
    Element figcaptionNode = null;
    if (figureElements != null) {
        for (Iterator<Element> iterator = figureElements.iterator(); iterator.hasNext();) {
            Element figureElement = iterator.next();
            Elements figureChildren = figureElement.getAllElements();
            Node imageNode = null;
            if (figureChildren != null) {
                for (Element figureChild : figureChildren) {
                    if ("img".equals(figureChild.nodeName())) {
                        imageNode = figureChild;
                    } else {
                        if ("figcaption".equals(figureChild.nodeName())) {
                            figcaptionNode = figureChild;
                            //set "figcaption" text as value for "alt" attribute  
                            if (imageNode != null) {
                                imageNode.attr("alt", figcaptionNode.text());
            if (imageNode != null) {
    return doc;

From source file:cn.wanghaomiao.xpath.core.XpathEvaluator.java

 * //from ww  w  . j  av  a 2s.  c o  m
 * @param e
 * @param node
 * @return
public Element filter(Element e, Node node) throws NoSuchFunctionException, NoSuchAxisException {
    if (node.getTagName().equals("*") || node.getTagName().equals(e.nodeName())) {
        if (node.getPredicate() != null && StringUtils.isNotBlank(node.getPredicate().getValue())) {
            Predicate p = node.getPredicate();
            if (p.getOpEm() == null) {
                if (p.getValue().matches("\\d+") && getElIndex(e) == Integer.parseInt(p.getValue())) {
                    return e;
                } else if (p.getValue().endsWith("()")
                        && (Boolean) callFilterFunc(p.getValue().substring(0, p.getValue().length() - 2), e)) {
                    return e;
                } else if (p.getValue().startsWith("@")
                        && e.hasAttr(StringUtils.substringAfter(p.getValue(), "@"))) {
                    return e;
                //todo p.value ~= contains(./@href,'renren.com')
            } else {
                if (p.getLeft().matches("[^/]+\\(\\)")) {
                    Object filterRes = p.getOpEm().excute(
                            callFilterFunc(p.getLeft().substring(0, p.getLeft().length() - 2), e).toString(),
                    if (filterRes instanceof Boolean && (Boolean) filterRes) {
                        return e;
                    } else if (filterRes instanceof Integer
                            && e.siblingIndex() == Integer.parseInt(filterRes.toString())) {
                        return e;
                } else if (p.getLeft().startsWith("@")) {
                    String lValue = e.attr(p.getLeft().substring(1));
                    Object filterRes = p.getOpEm().excute(lValue, p.getRight());
                    if ((Boolean) filterRes) {
                        return e;
                } else {
                    // ???xpath?
                    List<Element> eltmp = new LinkedList<Element>();
                    List<JXNode> rstmp = evaluate(p.getLeft(), new Elements(eltmp));
                    if ((Boolean) p.getOpEm().excute(StringUtils.join(rstmp, ""), p.getRight())) {
                        return e;
        } else {
            return e;
    return null;

From source file:com.spd.ukraine.lucenewebsearch1.web.IndexingController.java

 * Method used to perform recursive creation indexing for a given web page 
 * in search database./*from  w  w w. j a  va  2  s . c o  m*/
 * @param webPage webPage.url is entered url
 * webPage.title is set
 * @param html Jsoup.Document of entered url
 * @param recursionNumber used to stop recursion at exceeding 
private void indexElements(WebPage webPage, Document html, final int recursionNumber)
        throws IOException, ParseException {
    String title = html.title();
    if (referencedTitles.contains(title.trim())) {
    if (containsPage(webPage)) {
        System.out.println(webPage.getUrl() + " is already indexed");
    Element prevElement = null;
    Elements elements = html.body().getAllElements(); //.getElementsByTag("a");
    addDoc(webPage, html.text());
    //        for (Element element : elements) {
    ////                System.out.println(element.nodeName() + " element.text() " 
    ////                        + element.text() + " url " 
    ////                        + element.absUrl("href"));
    //            if (element.nodeName().equalsIgnoreCase("body")) {
    //                addDoc(webPage, element.text());
    //                break;
    ////                continue;
    //            }
    //            if (null == prevElement) {
    //                prevElement = element;
    ////            } else if (prevElementContainsElementText(prevElement, element)) {
    ////                continue;
    //            }
    ////            if (null !== webPagesService.findWebPage(element.absUrl("href")))
    //            if (element.text().trim().isEmpty()) {
    //                continue;
    //            }
    ////            StringTokenizer str = new StringTokenizer(element.text());
    ////            str.
    //            addDoc(webPage, element.text());
    //        }
    if (recursionNumber > MAX_RECURSION_SEARCH_NUMBER || referencedSites.size() > MAX_NUMBER_SITES_INDEXED) {
        //            System.out.println(recursionNumber + " " 
        //                    + referencedSites.contains(webPage.getUrl()));
            .filter((Element e) -> e.nodeName().equalsIgnoreCase("a") && null != e.absUrl(HREF)
                    && !e.absUrl(HREF).trim().isEmpty() && !referencedSites.contains(e.absUrl(HREF))
                    && !referencedSites.contains(removeSharpEtc(e.absUrl(HREF))))
            .forEach((Element element) -> {
                WebPage webPage1 = new WebPage(element.absUrl(HREF));
                String url1 = webPage1.getUrl();
                //                    System.out.println(recursionNumber + " recursion for '" 
                //                            + url1 + "'");
                try {
                    Document htmlR = Jsoup.connect(url1).get();
                    indexElements(webPage1, htmlR, recursionNumber + 1);
                } catch (IOException | ParseException e) {
                    System.out.println("Exception " + e.getMessage());
    //        for (Element element : elements) {
    //            if (!element.nodeName().equalsIgnoreCase("a")) {
    //                continue;
    //            }
    //            WebPage webPage1 = new WebPage(element.absUrl("href"));
    //            if (null == webPage1.getUrl() 
    //                    || webPage1.getUrl().isEmpty()
    //                    || referencedSites.contains(webPage1.getUrl())) {
    //                continue;
    //            }
    //            System.out.println(recursionNumber + "recursion for " 
    //                    + element.absUrl("href"));
    //            try {
    //                Document htmlR = Jsoup.connect(webPage1.getUrl()).get();
    //                webPage1.setTitle(htmlR.title());
    //                indexElements(webPage1, htmlR, recursionNumber + 1);
    //            } catch (IOException e) {
    //                System.out.println("IOException " + e.getMessage());
    //            }
    //            referencedSites.add(webPage1.getUrl());
    //        }

From source file:mml.handler.post.MMLPostHTMLHandler.java

 * May happen but should not// w  w  w .j  a  v a 2s.  co  m
 * @param elem an element that is not a span, p or div
private void parseOtherElement(Element elem) throws JSONException {
    List<Node> children = elem.childNodes();
    int offset = sb.length();
    String name = elem.attr("class");
    if (name == null || name.length() == 0)
        name = elem.nodeName();
    Range r = new Range(name, offset, 0);
    for (Node child : children) {
        if (child instanceof Element)
            parseOtherElement((Element) child);
        else if (child instanceof TextNode)
            sb.append(((TextNode) child).getWholeText());
    this.stil.updateLen(r, sb.length() - offset);
    prevWasMilestone = false;

From source file:mml.handler.post.MMLPostHTMLHandler.java

 * Parse the body of the HTML fragment/*  w w  w. ja v a 2  s . co m*/
 * @param body should be contents of the target div in the editor
 * @throws JSONException 
protected void parseBody(Element body) throws MMLSaveException {
    try {
        this.speller = new AeseSpeller(this.langCode);
        this.sb = new StringBuilder();
        String style = ScratchVersionSet.getDefaultStyleName(this.docid);
        stil = new STILDocument(style);
        pages = new STILDocument(style);
        if (body.nodeName().toLowerCase().equals("div"))
        else {
            List<Node> children = body.childNodes();
            for (Node child : children) {
                if (child instanceof Element) {
                    String nName = child.nodeName().toLowerCase();
                    if (nName.equals("div"))
                        parseDiv((Element) child);
                    else if (nName.equals("p"))
                        parsePara((Element) child, "p");
                    else if (nName.equals("span"))
                        parseSpan((Element) child);
                    else if (nName.matches("(h|H)\\d"))
                        parsePara((Element) child, nName);
                    else if (nName.equals("pre"))
                        parsePre((Element) child);
                        parseOtherElement((Element) child);

                // else it is insignificant white space
    } catch (Exception e) {
        if (this.speller != null)
        throw new MMLSaveException(e);

From source file:org.asqatasun.contentadapter.css.CSSJsoupPhlocContentAdapterImpl.java

 * Retrieve css content and adapt it for each inline resource
 *///from w w w.j  av a 2 s.co  m
private void adaptInlineCSS() {
    Set<Long> relatedCssIdSet = new HashSet<>();

    for (Element el : inlineCssElements) {
        String attributeValue = el.attr("style");
        if (StringUtils.isNotBlank(attributeValue)) {
            Resource cssResource = new CSSResourceImpl(el.nodeName() + "{" + attributeValue + "}", 0,
                    new InlineRsrc());
            StylesheetContent cssContent = getStylesheetFromInlineResource(cssResource.getResource());
            adaptContent(cssContent, cssResource, getCurrentResourcePath(el.baseUri()), null);
    getContentDataService().saveContentRelationShip(getSSP(), relatedCssIdSet);

From source file:org.asqatasun.processing.ProcessRemarkServiceImpl.java

public void addSourceCodeRemarkOnElement(TestSolution processResult, Element element, String messageCode,
        Collection<EvidenceElement> evidenceElementList) {
    SourceCodeRemark remark = processRemarkDataService.getSourceCodeRemark(processResult, messageCode);

    if (element != null) {
    } else {/*www  .j  a  v a 2 s .co  m*/
    if (CollectionUtils.isNotEmpty(evidenceElementList)) {
        for (EvidenceElement ee : evidenceElementList) {