Example usage for org.jsoup.nodes Element ownText

List of usage examples for org.jsoup.nodes Element ownText

Introduction

In this page you can find the example usage for org.jsoup.nodes Element ownText.

Prototype

public String ownText() 

Source Link

Document

Gets the text owned by this element only; does not get the combined text of all children.

Usage

From source file:Main.java

public static void main(String[] args) throws Exception {
    final Document document = Jsoup.parse(
            "<html><head/><body><a href=\"#\" class=\"artist\">Soulive<span class=\"create-play\">Play</span></a></body></html>");
    final Element elem = document.getElementsByAttributeValue("class", "artist").first();
    System.out.println(elem.ownText());
}

From source file:Main.java

public static String printNode(Element root, int indentation) {
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < indentation; i++) {
        sb.append(' ');
    }/*from  ww w  . j  a va2 s.c  om*/
    sb.append(root.tagName());
    sb.append(":");
    sb.append(root.ownText());
    sb.append("\n");
    for (Element el : root.children()) {
        sb.append(printNode(el, indentation + 1));
        sb.append("\n");
    }
    return sb.toString();
}

From source file:Main.java

private static String printNode(Element root, int indentation) {
    StringBuilder sb = new StringBuilder(indentation);
    for (int i = 0; i < indentation; i++) {
        sb.append(' ');
    }/*from  w w  w  .  j  a v a2s  .  co  m*/
    sb.append(root.tagName());
    sb.append(':');
    sb.append(root.ownText());
    sb.append('\n');
    for (Element el : root.children()) {
        sb.append(printNode(el, indentation + 1));
        sb.append('\n');
    }
    return sb.toString();
}

From source file:com.nuance.expertassistant.ContentExtractor.java

public static void extract(Document doc) {

    final Elements links = doc.getElementsByTag("a");
    final Elements ps = doc.select("p");

    final String title = doc.title();

    print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(doc.title()) + "\">");

    final Elements elements = doc.select("*");

    final ArrayList<String> openHeaderList = new ArrayList<String>();

    for (final Element element : elements) {
        if (element.ownText() == null || element.ownText().isEmpty() || element.ownText().trim() == "") {

        } else if (element.tagName().toString().contains("a")) {

        } else if (element.tagName().contains("h1") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h1")) {
                openHeaderList.remove("h1");
                print("</section>");
            }/*  w  w  w .j  a va  2  s  . co  m*/
            if (openHeaderList.contains("h2")) {
                openHeaderList.remove("h2");
                print("</section>");
            }
            if (openHeaderList.contains("h3")) {
                openHeaderList.remove("h3");
                print("</section>");
            }
            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h1");

        } else if (element.tagName().contains("h2") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h2")) {
                openHeaderList.remove("h2");
                print("</section>");
            }
            if (openHeaderList.contains("h3")) {
                openHeaderList.remove("h3");
                print("</section>");
            }
            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h2");

        } else if (element.tagName().contains("h3") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h3")) {
                openHeaderList.remove("h3");
                print("</section>");
            }
            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h3");

        } else if (element.tagName().contains("h4") && element.text() != null && !element.text().isEmpty()) {

            if (openHeaderList.contains("h4")) {
                openHeaderList.remove("h4");
                print("</section>");
            }

            print("<section id =\"{}\" title =\"" + stripNonValidXMLCharacters(element.text()) + "\">");
            openHeaderList.add("h4");

        }

        else {
            print("<para>");
            print(stripNonValidXMLCharacters(element.ownText()));
            print("</para>");
        }

        /*
         * if (element.tagName().contains("img")) { print("<img src=\"" +
         * element.attr("src") + "\"></img>"); }
         */
    }

    if (openHeaderList.contains("h1")) {
        openHeaderList.remove("h1");
        print("</section>");
    }
    if (openHeaderList.contains("h2")) {
        openHeaderList.remove("h2");
        print("</section>");
    }
    if (openHeaderList.contains("h3")) {
        openHeaderList.remove("h3");
        print("</section>");
    }
    if (openHeaderList.contains("h4")) {
        openHeaderList.remove("h4");
        print("</section>");
    }

    print("</section>");

}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

private static Element getContentElement(Element cell) {
    if (cell.ownText().isEmpty() && cell.select("> span").size() == 1) {
        cell = cell.select("> span").first();
    }//from ww w. j a v  a 2 s  .  c  om
    return cell;
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

static void handleRoom(Substitution subst, Element cell) {
    cell = getContentElement(cell);//w  w w  . j a v a2s. com
    if (cell.select("s").size() > 0) {
        subst.setPreviousRoom(cell.select("s").text());
        if (cell.ownText().length() > 0) {
            subst.setRoom(cell.ownText().replaceFirst("^\\?", "").replaceFirst("", ""));
        }
    } else {
        subst.setRoom(cell.text());
    }
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

static void handleSubject(Substitution subst, Element cell) {
    cell = getContentElement(cell);//from   w  ww .ja va  2s.c om
    if (cell.select("s").size() > 0) {
        subst.setPreviousSubject(cell.select("s").text());
        if (cell.ownText().length() > 0) {
            subst.setSubject(cell.ownText().replaceFirst("^\\?", "").replaceFirst("", ""));
        }
    } else {
        subst.setSubject(cell.text());
    }
}

From source file:me.vertretungsplan.parser.UntisCommonParser.java

static void handleTeacher(Substitution subst, Element cell, JSONObject data) {
    cell = getContentElement(cell);//from   ww  w .  ja v a  2 s .c o m
    if (cell.select("s").size() > 0) {
        subst.setPreviousTeachers(splitTeachers(cell.select("s").text(), data));
        if (cell.ownText().length() > 0) {
            subst.setTeachers(
                    splitTeachers(cell.ownText().replaceFirst("^\\?", "").replaceFirst("", ""), data));
        }
    } else {
        subst.setTeachers(splitTeachers(cell.text(), data));
    }
}

From source file:hello.Scraper.java

@Transformer(inputChannel = "channel3", outputChannel = "channel4")
public DumpEntry convert(Element payload) throws ParseException {
    String dateStr = payload.ownText().substring(0, 19);

    DateFormat format = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    format.setTimeZone(TimeZone.getTimeZone("GMT"));

    Date timestamp = format.parse(dateStr);

    Elements list = payload.select("a");
    String id;//from   w ww  .j  av a2 s  .  c  o m
    String ref;
    if (list.size() > 0) {
        Element a = list.get(0);
        id = a.ownText();
        ref = a.attr("href");
    } else {
        id = "private data";
        ref = null;
    }

    Element span = payload.select("span").get(0);
    String status = span.ownText();

    return new DumpEntry(timestamp, id, ref, status);
}

From source file:org.apache.marmotta.ldclient.provider.phpbb.PHPBBPostProvider.java

/**
 * Return a mapping table mapping from RDF properties to XPath Value Mappers. Each entry in the
 * map is evaluated/*from ww  w .  ja  v  a  2  s.c o m*/
 * in turn; in case the XPath expression yields a result, the property is added for the
 * processed resource.
 * 
 * @return
 * @param requestUrl
 */
@Override
protected Map<String, JSoupMapper> getMappings(String resource, String requestUrl) {
    URI uri = null;
    try {
        uri = new URI(requestUrl);
        Map<String, String> params = new HashMap<String, String>();
        for (NameValuePair p : URLEncodedUtils.parse(uri, "UTF-8")) {
            params.put(p.getName(), p.getValue());
        }

        if (params.containsKey("p")) {
            // mappings for a reply that has directly been addressed using the ?p=... parameter
            // to viewtopic.php, e.g. http://www.carving-ski.de/phpBB/viewtopic.php?p=119208
            Map<String, JSoupMapper> commentMappings = new HashMap<String, JSoupMapper>();
            commentMappings.put(Namespaces.NS_DC + "title", new CssTextLiteralMapper(
                    String.format("div#pagecontent table:has(a[name=p%s]) td.gensmall div", params.get("p"))) {
                @Override
                public List<Value> map(String resourceUri, Element elem, ValueFactory factory) {
                    final String val = elem.ownText().replaceFirst("^\\s*:", "").replaceAll("&nbsp;", " ")
                            .trim();
                    if (datatype != null)
                        return Collections.singletonList((Value) factory.createLiteral(val,
                                factory.createURI(Namespaces.NS_XSD + datatype)));
                    else
                        return Collections.singletonList((Value) factory.createLiteral(val));
                }

                @Override
                public Elements select(Element htmlDoc) {
                    final Element first = super.select(htmlDoc).first();
                    return first != null ? new Elements(first) : new Elements();
                }
            });
            commentMappings.put(Namespaces.NS_DC + "creator", new CssTextLiteralMapper(
                    String.format("div#pagecontent table:has(a[name=p%s]) .postauthor", params.get("p"))));
            commentMappings.put(Namespaces.NS_DC + "description", new CssTextLiteralMapper(
                    String.format("div#pagecontent table:has(a[name=p%s]) div.postbody", params.get("p"))));
            commentMappings.put(Namespaces.NS_DC + "date", new PHPBBDateMapper(
                    String.format("div#pagecontent td.gensmall:has(a[name=p%s]) div", params.get("p"))) {
                @Override
                public Elements select(Element htmlDoc) {
                    final Elements sel = super.select(htmlDoc);
                    if (sel.size() > 0) {
                        final Element e = sel.get(1);
                        if (e != null)
                            return new Elements(e);
                    }
                    return new Elements();
                }
            });

            return commentMappings;
        } else
            throw new RuntimeException(
                    "the requested resource does not seem to identify a PHPBB Post (p=... parameter missing)");

    } catch (URISyntaxException e) {
        throw new RuntimeException(
                "the requested resource does not seem to identify a PHPBB Post (URI syntax error)");
    }

}