Example usage for org.jsoup.nodes Document title

List of usage examples for org.jsoup.nodes Document title

Introduction

In this page you can find the example usage for org.jsoup.nodes Document title.

Prototype

public String title() 

Source Link

Document

Get the string contents of the document's title element.

Usage

From source file:org.brunocvcunha.taskerbox.impl.crawler.SniptAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".grid-block").select("a")) {
        final String id = el.attr("href").replace("http://snipt.org/", "");

        final String title = id + " - " + el.text();

        if (canAct(id)) {
            addAct(id);/*from   w w  w  .ja  va2  s  . co m*/

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:org.brunocvcunha.taskerbox.impl.crawler.PastebinAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".maintable").select("a")) {
        final String id = el.attr("href").substring(1);
        if (id.startsWith("archive")) {
            continue;
        }/*w  w  w .  j  av  a 2 s .com*/

        final String title = id + " - " + el.text();

        if (canAct(id)) {
            addAct(id);

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:com.johan.vertretungsplan.parser.UntisMonitorParser.java

public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password); //

    JSONArray urls = schule.getData().getJSONArray("urls");
    String encoding = schule.getData().getString("encoding");
    List<Document> docs = new ArrayList<Document>();

    for (int i = 0; i < urls.length(); i++) {
        JSONObject url = urls.getJSONObject(i);
        loadUrl(url.getString("url"), encoding, url.getBoolean("following"), docs);
    }//from  ww  w  .  j a va 2  s  . c o m

    LinkedHashMap<String, VertretungsplanTag> tage = new LinkedHashMap<String, VertretungsplanTag>();
    for (Document doc : docs) {
        if (doc.title().contains("Untis")) {
            VertretungsplanTag tag = parseMonitorVertretungsplanTag(doc, schule.getData());
            if (!tage.containsKey(tag.getDatum())) {
                tage.put(tag.getDatum(), tag);
            } else {
                VertretungsplanTag tagToMerge = tage.get(tag.getDatum());
                tagToMerge.merge(tag);
                tage.put(tag.getDatum(), tagToMerge);
            }
        } else {
            //Fehler
        }
    }
    Vertretungsplan v = new Vertretungsplan();
    v.setTage(new ArrayList<VertretungsplanTag>(tage.values()));

    return v;
}

From source file:org.brunocvcunha.taskerbox.impl.crawler.CodepadAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".section")) {
        final String id = el.select("a").attr("href").replace("http://codepad.org/", "");
        String code = el.select("pre").text().replaceAll("\r?\n", " ");
        if (code.length() > 32) {
            code = code.substring(0, 32);
        }/*from   w w w  .j  a  v a  2s  .  com*/

        final String title = id + " - " + code;

        if (canAct(id)) {
            addAct(id);

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:org.brunocvcunha.taskerbox.impl.crawler.PastieAction.java

@Override
public void action(final Document entry) {

    log.debug("Validating " + entry.title());

    for (Element el : entry.select(".pastePreview")) {
        final String id = el.select("a").attr("href").replace("http://pastie.org/pastes/", "");
        String code = el.select("pre").text().replaceAll("\r?\n", " ");
        if (code.length() > 32) {
            code = code.substring(0, 32);
        }/*from w  w  w . j a  v a  2s  . c  om*/

        final String title = id + " - " + code;

        if (canAct(id)) {
            addAct(id);

            spreadAction(id, title);
            serializeAlreadyAct();
            sleep(FETCH_INTERVAL);
        }

    }

}

From source file:cn.edu.hfut.dmic.webcollector.example.TutorialCrawler.java

@Override
public Links visitAndGetNextLinks(Page page) {
    Document doc = page.getDoc();
    String title = doc.title();
    System.out.println("URL:" + page.getUrl() + "  :" + title);

    /*??mysql*//*from www  . j  a v a2s  . c  o  m*/
    if (jdbcTemplate != null) {
        int updates = jdbcTemplate.update("insert into tb_content (title,url,html) value(?,?,?)", title,
                page.getUrl(), page.getHtml());
        if (updates == 1) {
            System.out.println("mysql??");
        }
    }

    /*?2.0*/
    /*?page??
     ?URL???URL*/
    Links nextLinks = new Links();

    /*???URL
     Links.addAllFromDocument??*/
    nextLinks.addAllFromDocument(doc, regexRule);

    /*LinksArrayList<String>,?add?addAllURL
     ?????return null
     ???seed?return null
     */
    return nextLinks;
}

From source file:com.qubole.rubix.hadoop1.Hadoop1ClusterManager.java

private List<String> extractNodes(String dfsnodelist) {
    Document doc = Jsoup.parse(dfsnodelist);

    String title = doc.title();
    List<String> workers = new ArrayList<String>();

    Elements links = doc.select(".name");
    for (int i = 0; i < links.size(); i++) {
        Elements nodes = links.get(i).select("a[href]");
        if (nodes != null && nodes.size() > 0) {
            String node = nodes.get(0).ownText();
            if (node != null && !node.isEmpty()) {
                workers.add(node);//from   w  w  w  .ja v a  2 s .c o  m
            }
        }
    }

    Collections.sort(workers);
    return workers;
}

From source file:fi.helsinki.opintoni.service.usefullink.UsefulLinkService.java

public SearchPageTitleDto searchPageTitle(SearchPageTitleDto searchPageTitleDto) throws NotFoundException {
    try {/* www. j  a va  2  s . c  o  m*/
        HttpHeaders headers = new HttpHeaders();
        headers.setAccept(Lists.newArrayList(MediaType.TEXT_HTML));
        headers.add("User-Agent", "Mozilla");
        HttpEntity<String> entity = new HttpEntity<>("parameters", headers);

        ResponseEntity<String> responseEntity = linkUrlLoaderRestTemplate.exchange(searchPageTitleDto.searchUrl,
                HttpMethod.GET, entity, String.class);
        if (responseEntity.getStatusCode().equals(HttpStatus.OK)) {
            Document document = Jsoup.parse(responseEntity.getBody());
            searchPageTitleDto.searchResult = document.title();
        }
    } catch (Exception e) {
    }
    return searchPageTitleDto;
}

From source file:org.manalith.ircbot.plugin.uriinfo.UriInfoPlugin.java

private String getInfo(String uri) {
    String result = null;/*from   w  ww. ja va2s . c  om*/
    Response response;

    try {
        // ? ?? User Agent  ? 
        response = Jsoup.connect(uri).userAgent(USER_AGENT).execute();
    } catch (UnsupportedMimeTypeException e) {
        return enablePrintContentType ? "[?? ?] " + e.getMimeType() : null;
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
        return null;
    }

    String contentType = response.contentType();

    // ? title?  ? .
    try {
        Document document = response.parse();
        String title = document.title();

        if (StringUtils.isBlank(title))
            throw new IOException();

        title = title.trim().replaceAll("(\\s){1,}", " ");

        //  ??  
        String stitle = getSiteSpecificTitle(uri, document);
        if (stitle == null)
            result = "[?? ] " + title;
        else
            result = "[?? ] " + stitle + " | " + title;
    } catch (IOException e) {
        // parse  ?  title -- HTML?
        //   ???  
        // content type  
        if (contentType.startsWith("text/html"))
            result = "[?? ]";
    }

    if (result == null && enablePrintContentType) {
        result = "[?? ?] " + contentType;
    }

    return result;
}

From source file:app.data.parse.WebPageUtil.java

public static WebPageInfo parse(String url, Cache<String, WebPageInfo> urlInfoCache) throws IOException {
    String original = url;/*from  w w w  . j a va 2s  .co  m*/

    // hit toutiao.io
    // fixme http://toutiao.io/shares/640539/url
    if (original.startsWith("https://toutiao.io/posts/")) {
        original = original.replace("/posts/", "/k/");
    }

    // check cache
    WebPageInfo info = urlInfoCache != null ? urlInfoCache.getIfPresent(original) : null;
    if (info != null) {
        return info;
    } else {
        info = new WebPageInfo();
        info.url = original;
    }

    // attach url
    Document doc = requestUrl(info.url);
    info.url = doc.baseUri(); // or doc.location()

    // hit gold.xitu.io
    if (info.url.startsWith("http://gold.xitu.io/entry/")) {
        Elements origin = doc.select("div[class=ellipsis]");
        Elements originLink = origin.select("a[class=share-link]");
        info.url = originLink.attr("href");

        // reconnect
        doc = requestUrl(info.url);
        info.url = doc.baseUri(); // or doc.location()
    }

    info.url = smartUri(info.url);

    // get title
    Elements metaTitle = doc.select("meta[property=og:title]");
    if (metaTitle != null) {
        info.title = metaTitle.attr("content");
    }
    if (StringUtils.isEmpty(info.title)) {
        metaTitle = doc.select("meta[property=twitter:title]");
        if (metaTitle != null) {
            info.title = metaTitle.attr("content");
        }
        info.title = StringUtils.isEmpty(info.title) ? doc.title() : info.title;
    }

    // get desc
    Elements metaDesc = doc.select("meta[property=og:description]");
    if (metaDesc != null) {
        info.description = metaDesc.attr("content");
    }
    if (StringUtils.isEmpty(info.description)) {
        metaDesc = doc.select("meta[property=twitter:description]");
        if (metaDesc != null) {
            info.description = metaDesc.attr("content");
        }
        if (StringUtils.isEmpty(info.description)) {
            metaDesc = doc.select("meta[name=description]");
            if (metaDesc != null) {
                info.description = metaDesc.attr("content");
            }
            if (StringUtils.isEmpty(info.description)) {
                metaDesc = doc.body().select("p");
                if (metaDesc != null) {
                    for (Element element : metaDesc) {
                        info.description = element.text();
                        if (info.description != null && info.description.length() >= 20) {
                            break;
                        }
                    }
                }
            }
        }
    }
    info.description = ellipsis(info.description, 140, "...");

    // cache info
    if (urlInfoCache != null) {
        urlInfoCache.put(original, info);
    }
    return info;
}