Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:io.seldon.importer.articles.dynamicextractors.FirstElementAttrUppercaseValueDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {

    String attrib_value = null;//from  w  ww. j  a  v  a  2s.c  om

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 2)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Element element = articleDoc.select(cssSelector).first();
        if (StringUtils.isNotBlank(cssSelector)) {
            int arg_count = 0;
            for (String value_name : attributeDetail.extractor_args) {
                if (arg_count > 0) { // skip the first one, its the cssSelector
                    if (element != null && element.attr(value_name) != null) {
                        attrib_value = element.attr(value_name);
                        if (StringUtils.isNotBlank(attrib_value)) {
                            break;
                        }
                    }
                }
                arg_count++;
            }
        }
    }

    attrib_value = (attrib_value != null) ? attrib_value.toUpperCase() : attrib_value;

    return attrib_value;
}

From source file:com.example.android.expandingcells.ExpandingCells.java

public void getHoroscope() {

    AsyncHttpClient client = new AsyncHttpClient();
    client.get(//  w w w .  j  av a  2 s.c  o  m
            "http://pipes.yahoo.com/pipes/pipe.run?_id=_omfgXdL3BGGadhGdrq02Q&_render=json&sign=Virgo&url=http%3A%2F%2Fwww.astrology.com%2Fhoroscopes%2Fdaily-horoscope.rss",
            new JsonHttpResponseHandler() {
                @Override
                public void onSuccess(JSONObject response) {
                    JSONArray horoscopeJsonResults = null;
                    try {
                        horoscopeJsonResults = response.getJSONObject("value").getJSONArray("items");

                        // Pass the index value based on what sunsign, here, 0->Aries, 1->Taurus ... etc            
                        String horoscopeFullString = horoscopeJsonResults.getJSONObject(0)
                                .getString("description").toString();

                        Document doc = Jsoup.parse(horoscopeFullString);
                        Element p = doc.select("p").first();
                        horoscopeText = p.text();
                        Horo_Image = "drawable://" + R.drawable.mb_horoscope;
                        Horo = "Daily Horoscope for " + CommonLib.findZodiacSign("9", "6") + ": \n"
                                + horoscopeText;
                        Log.d("DEBUG", Horo);
                        Log.d("DEBUG", "Horo");
                        NewsList.get(1).setNews(Horo_Image, "Horoscope", Horo);
                        adapter.notifyDataSetChanged();
                        Log.d("DEBUG", "Horo1");
                        adapter.notifyDataSetChanged();
                        //itemList.add(new Bytes(Horo_Image,"Horoscope",  Horo));
                        // Replace this with birth date
                        //todoAdapter.add("Daily Horoscope for " + CommonLib.findZodiacSign("12", "6") + ": \n" + horoscopeText);

                    } catch (JSONException e) {
                        e.printStackTrace();
                        Log.d("DEBUG", "pipes");
                    }
                }
            });
}

From source file:net.niyonkuru.koodroid.html.SubscribersHandler.java

@Override
public ArrayList<ContentProviderOperation> parse(Document doc, ContentResolver resolver)
        throws HandlerException {
    final ArrayList<ContentProviderOperation> batch = new ArrayList<ContentProviderOperation>();

    Element subscriberLi = doc.select("div#banSelector li:has(div)").first();
    while (subscriberLi != null) {
        String text = subscriberLi.text();

        /* this assumes the name and phone number are separated by a space */
        int separator = text.lastIndexOf(' ') + 1;

        String subscriberId = text.substring(separator).replaceAll("\\D", "");
        if (subscriberId.length() != 10)
            throw new HandlerException(getString(R.string.parser_error_unexpected_input));

        final ContentProviderOperation.Builder builder;

        final Uri subscriberUri = Subscribers.buildSubscriberUri(subscriberId);
        if (subscriberExists(subscriberUri, resolver)) {
            builder = ContentProviderOperation.newUpdate(subscriberUri);
            builder.withValue(Subscribers.UPDATED, System.currentTimeMillis());
        } else {//from w w  w.j a v  a  2s.  c o m
            builder = ContentProviderOperation.newInsert(Subscribers.CONTENT_URI);
        }
        builder.withValue(Subscribers.SUBSCRIBER_ID, subscriberId);

        String fullName = "";
        String[] names = text.substring(0, separator).split("\\s");
        for (String name : names) {
            fullName += ParserUtils.capitalize(name) + " ";
        }
        builder.withValue(Subscribers.SUBSCRIBER_FULL_NAME, fullName.trim());

        if (subscriberLi.hasAttr("onClick")) {
            String switchUrl = subscriberLi.attr("onClick");

            /* extract only the url */
            switchUrl = switchUrl.substring(switchUrl.indexOf('/'), switchUrl.lastIndexOf('\''));
            builder.withValue(Subscribers.SUBSCRIBER_SWITCHER, switchUrl);
        } else { /* this is the default subscriber as it doesn't have a switcher url */
            ContentValues cv = new ContentValues(1);
            cv.put(Settings.SUBSCRIBER, subscriberId);

            resolver.insert(Settings.CONTENT_URI, cv);
        }
        builder.withValue(Subscribers.SUBSCRIBER_EMAIL, mParent);

        batch.add(builder.build());

        subscriberLi = subscriberLi.nextElementSibling();
    }
    if (batch.size() == 0)
        throw new HandlerException(getString(R.string.parser_error_unexpected_input));

    JSONObject metadata = new JSONObject();
    try {
        metadata.put("subscribers", batch.size());
        metadata.put("language", getString(R.string.locale));
    } catch (JSONException ignored) {
    }
    Crittercism.setMetadata(metadata);
    Crittercism.setUsername(mParent);

    return batch;
}

From source file:io.seldon.importer.articles.dynamicextractors.AllElementsTextListValueDynamicExtractor.java

@Override
public String extract(AttributeDetail attributeDetail, String url, Document articleDoc) throws Exception {

    String attrib_value = null;//from w  w  w  . j  a  va2  s.  com

    if ((attributeDetail.extractor_args != null) && (attributeDetail.extractor_args.size() >= 1)) {
        String cssSelector = attributeDetail.extractor_args.get(0);
        Elements elements = articleDoc.select(cssSelector);
        if (StringUtils.isNotBlank(cssSelector)) {
            if (elements != null) {
                StringBuilder sb = new StringBuilder();
                boolean isFirstInList = true;
                for (Element e : elements) {
                    String eText = e.text();
                    eText = StringUtils.strip(eText);
                    if (StringUtils.isBlank(eText))
                        continue;
                    eText = eText.toLowerCase();
                    if (isFirstInList) {
                        isFirstInList = false;
                    } else {
                        sb.append(",");
                    }
                    sb.append(eText);
                }
                attrib_value = sb.toString();
            }
        }
    }

    return attrib_value;
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

protected void transformDocument(final Document slidesDocument, final Configuration config) {
    if (!config.notesIncluded()) {
        for (Element notesElement : slidesDocument.select("aside")) {
            notesElement.remove();//from  www .j a va 2s . c  o m
        }
    }
    if ("true".equals(config.getOption("renderSyntaxHighlighting"))) {
        renderSyntaxHighlightingHtml(slidesDocument, config);
    }
}

From source file:coding.cowboys.scrapers.DvcMagicResalesScraper.java

public List<ResortWrapper> findResorts() {
    List<ResortWrapper> wrappers = new ArrayList<ResortWrapper>();
    Document doc = null;
    try {/* w ww  .j  a va  2s. c o  m*/
        doc = Jsoup.connect(SiteUrls.DVC_MAGIC_RESALES).timeout(60000).get();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    if (doc != null) {
        for (Element element : doc.select("table#listALL")) {
            for (Element row : element.select("tr")) {
                if (row.hasClass("stat-Active")) {
                    ResortWrapper wrapper = new ResortWrapper();
                    Elements data = row.select("td");
                    wrapper.setResort(getResortFromText(data.get(0).text()));
                    wrapper.setUseYear(data.get(1).text());
                    wrapper.setPoints(data.get(2).text());
                    wrapper.setPricePerPoint(data.get(3).text());
                    wrapper.setTotalPrice(data.get(4).text());
                    wrapper.setPointSummary(data.get(0).text().replace(wrapper.getResort(), ""));
                    wrapper.setUrl("http://www.dvcmagicresales.com/dvcmr/resales-all-listings/");
                    wrappers.add(wrapper);
                }

            }

        }

    } else {
        System.out.println("DVC Magic Resales returned null");
    }
    return wrappers;
}

From source file:com.aurel.track.exchange.docx.exporter.PreprocessImage.java

/**
 * Gets the image captions in a map keyed by itemID_attachmentID
 * The key is saved also in the <img> tag's "alt" attribute for later use from word
 * @param doc/*ww w .j  a  v  a 2s  .  co  m*/
 * @param personID
 * @param imageCaptionsMap
 * @return
 */
private String getImageCaptions(Document doc, Integer personID,
        Map<String, ImageOrTableCaption> imageCaptionsMap) {
    Elements imgElements = doc.select("img");
    if (imgElements != null) {
        for (Iterator<Element> iterator = imgElements.iterator(); iterator.hasNext();) {
            Element imageElement = iterator.next();
            String sourceAttribute = imageElement.attr("src");
            String style = imageElement.attr("style");
            //remove the width and height attributes from html img to avoid java.lang.OutOfMemoryError: Java heap space
            imageElement.removeAttr("width");
            imageElement.removeAttr("height");
            ALIGN align = null;
            if (style != null) {
                if (style.contains("float:left")) {
                    align = ALIGN.LEFT;
                } else {
                    if (style.contains("float:right")) {
                        align = ALIGN.RIGHT;
                    }
                }
            }
            String altAttribute = imageElement.attr("alt");
            Map<String, String> map = getTemporaryFilePathMap(sourceAttribute, personID);
            if (map != null) {
                imageElement.attr("src", map.get("temporaryFilePath"));
                //save imageCaption into the map and now use the "alt" attribute for storing the merged key
                //which will be transformed  in nonvisualdrawingprops.getDescr() by XHTMLImporterImpl to set the caption on the ms word side
                String imageCaption = null;
                if (altAttribute != null && !"".equals(altAttribute)) {
                    //probably from previously removed figcaption but it may also be explicitly set
                    imageCaption = altAttribute;
                } else {
                    imageCaption = map.get("description");
                }
                globalCounter++;
                counterWithinChapter++;
                imageElement.attr("alt", String.valueOf(globalCounter));
                if (imageCaption == null) {
                    //add anyway to the map even as empty string because this marks the image to be added to the List of figures 
                    imageCaption = "";
                }
                imageCaptionsMap.put(String.valueOf(globalCounter),
                        new ImageOrTableCaption(chapterNo, counterWithinChapter, imageCaption, align));
            }
        }
    }
    return doc.body().html();
}

From source file:accountgen.controller.Controller.java

private void getListTags(Document doc, Person p) {
    Elements li = doc.select(".extra").select("li:not(.lab)");
    p.setUsername(li.get(2).text());/*from  w  w w  .  ja va  2 s  .c  om*/
    p.setPassword(li.get(3).text());
    p.setMmn(li.get(4).text());
    p.setMastercard(li.get(6).text());
    p.setSsn("");
    Date d = new Date();
    d.setDate(1);
    d.setYear(Integer.parseInt(li.get(7).text().split("/")[1]) - 1900);
    d.setMonth(Integer.parseInt(li.get(7).text().split("/")[0]) - 1);
    p.setExpires(d);
    p.setCvv2(li.get(8).text());
    p.setFavoritecolor(li.get(9).text());
    p.setOccupation(li.get(10).text());
    p.setCompany(li.get(11).text());
    p.setWebsite(li.get(12).text());
    Vehicle v = new Vehicle();
    v.setModel(li.get(13).text().split(" ")[li.get(13).text().split(" ").length - 1].trim());
    v.setYear(Integer.parseInt(li.get(13).text().split(" ")[0].trim()));
    v.setBrand(
            li.get(13).text().replace(li.get(13).text().split(" ")[li.get(13).text().split(" ").length - 1], "")
                    .replace(li.get(13).text().split(" ")[0], "").trim());
    p.setVehicle(v);
    p.setUpsnr(li.get(14).text());
    p.setBloodtype(li.get(15).text());
    p.setWeight(li.get(16).text().split("\\(")[1].split(" ")[0]);
    p.setHeight(li.get(17).text().split("\\(")[1].split(" ")[0]);
    p.setGuid(li.get(18).text());
}

From source file:com.amastigote.xdu.query.module.WaterAndElectricity.java

private void getPageAttributes(String host_suffix) throws IOException {
    Document document = getPage("", host_suffix);
    Elements elements_VIEWSTATE = document.select("input[type=\"hidden\"][name=\"__VIEWSTATE\"]");
    VIEWSTATE = elements_VIEWSTATE.get(0).attr("value");
}

From source file:com.aestheticsw.jobkeywords.service.termextractor.impl.indeed.IndeedClient.java

/**
 * Return the job-details sub-section of the HTML that Indeed returns. This method takes a URL
 * that Indeed returns for each JobSummary returned by getIndeedJobSummaryList() above.
 * <p/>//from   w ww.  j  av  a2 s .  c o m
 * 
 * This method is dependent upon the JSoup library which can consume malformed HTML and XML with
 * invalid syntax.
 * <p/>
 * 
 * JSoup can't be tested easily.
 * 
 * TODO convert from JSoup to HtmlCleaner
 */
public String getIndeedJobDetails(String url) {
    log.debug("Indeed job-details query: " + url);
    Document doc;
    try {
        doc = Jsoup.connect(url).get();
    } catch (IOException e) {
        throw new RuntimeException("Indeed-search or JSoup response parser failed, URL: " + url, e);
    }
    Elements jobHeader = doc.select("#job_header > b > font");
    Elements jobSummary = doc.select("#job_summary");
    StringBuilder sb = new StringBuilder();
    sb.append(jobHeader.toString()).append("\n");
    sb.append(jobSummary.toString());
    return sb.toString();
}