Example usage for org.jsoup.nodes Document select

List of usage examples for org.jsoup.nodes Document select

Introduction

In this page you can find the example usage for org.jsoup.nodes Document select.

Prototype

public Elements select(String cssQuery) 

Source Link

Document

Find elements that match the Selector CSS query, with this element as the starting context.

Usage

From source file:com.msds.km.service.Impl.DrivingLicenseRecognitionServcieiImpl.java

/**
 * html???/*from   www .  j  ava 2s  .  c o m*/
 * 
 * @param html
 *            ??xml?java
 * @return 
 * @throws Exception
 */
private DrivingLicense parseDrivingLicense(String html) throws Exception {
    if (html.isEmpty()) {
        logger.info("");
        return null;
    }
    Document document = Jsoup.parse(html);
    if (document == null) {
        logger.info("html");
        return null;
    }

    Elements fieldsets = document.select("div[class=left result] fieldset");
    if (fieldsets.size() != 1) {
        logger.info("?");
        return null;
    }
    Element regResult = fieldsets.first();
    String result = regResult.html().trim();

    // 
    String removedStr = "<legend></legend>";
    if (result.startsWith(removedStr)) {
        result = result.substring(removedStr.length());
    }

    // ??xml
    result = StringEscapeUtils.unescapeXml(result);

    // 
    result = "<drivingLicense>" + result + "</drivingLicense>";
    return XMLUtils.toObject(result, DrivingLicense.class);
}

From source file:gov.medicaid.screening.dao.impl.AccreditedBirthCentersLicenseDAOBean.java

/**
 * Retrieves all results from the source site.
 * /*from   w w w  .jav a  2 s. c  o  m*/
 * @return the birth centers matched
 * @throws URISyntaxException
 *             if the URL could not be correctly constructed
 * @throws IOException
 *             for any I/O related errors
 * @throws ServiceException
 *             for any other errors encountered
 */
private List<AccreditedBirthCenter> getAllResults() throws URISyntaxException, IOException, ServiceException {
    DefaultHttpClient client = new DefaultHttpClient();
    client.setRedirectStrategy(new LaxRedirectStrategy());

    HttpGet getFrontPage = new HttpGet(new URIBuilder(getSearchURL()).build());
    HttpResponse response = client.execute(getFrontPage);

    verifyAndAuditCall(getSearchURL(), response);

    Document page = Jsoup.parse(EntityUtils.toString(response.getEntity()));

    List<AccreditedBirthCenter> allCenters = new ArrayList<AccreditedBirthCenter>();
    Elements rows = page.select("table#wp-table-reloaded-id-1-no-1 tbody tr");
    for (Element row : rows) {
        AccreditedBirthCenter center = parseCenter(row.children());
        if (center != null) {
            allCenters.add(center);
        }
    }
    return allCenters;
}

From source file:cd.go.contrib.elasticagents.dockerswarm.elasticagent.executors.AgentStatusReportExecutorTest.java

@Test
public void shouldPrintMessageWhenLogIsNotAvailable() throws Exception {
    final Service service = mockedService("elastic-agent-id", "abcd-xyz");
    when(statusReportRequest.getJobIdentifier()).thenReturn(null);
    when(statusReportRequest.getElasticAgentId()).thenReturn("elastic-agent-id");
    when(client.listServices()).thenReturn(Arrays.asList(service));
    when(client.serviceLogs("abcd-xyz", stdout(), stderr())).thenReturn(new StubbedLogStream(""));

    GoPluginApiResponse response = executor.execute();

    assertThat(response.responseCode(), is(200));
    final Map<String, String> responseMap = GSON.fromJson(response.responseBody(),
            new TypeToken<Map<String, String>>() {
            }.getType());//from  ww w  .j ava2s.  c  om
    assertTrue(responseMap.containsKey("view"));

    final Document document = Jsoup.parse(responseMap.get("view"));
    assertThat(document.select(".service-logs").text(), is("Logs not available for this agent."));
}

From source file:accountgen.controller.Controller.java

private void setEmail(Document doc, Person p) {
    Element em = doc.select(".email").first();
    p.setEmail(em.text().split(" ")[0]);
}

From source file:accountgen.controller.Controller.java

private void setPhone(Document doc, Person p) {
    Element tel = doc.select(".tel").first();
    p.setPhone(tel.text());
}

From source file:com.webcrawler.manager.impl.ImageManagerImpl.java

@Override
public List<ImageDTO> getImageData(final String url)
        throws IOException, IllegalArgumentException, InterruptedException, ExecutionException {

    if (url == null || url.equals("")) {
        throw new IllegalArgumentException("Set URL first");
    }/*  w w  w. j ava 2 s .c  o m*/

    Callable<List<ImageDTO>> callable = new Callable<List<ImageDTO>>() {

        @Override
        public List<ImageDTO> call() throws Exception {
            System.out.println("Retrieving image data from url " + url);

            Document document = null;
            Elements media = null;
            List<ImageDTO> images = new ArrayList<ImageDTO>();
            try {
                document = Jsoup.connect(url).get();
                media = document.select("[src]");
            } catch (Exception e) {
                e.printStackTrace();
                return images;
            }

            System.out.println("# of images: " + media.size());

            for (Element src : media) {
                if (src.tagName().equals("img")) {
                    ImageDTO dto = new ImageDTO();
                    dto.setUrlAddress(src.attr("abs:src"));
                    dto.setFileName(getFileName(src.attr("abs:src")));
                    images.add(dto);
                }
            }

            return images;
        }
    };

    Future<List<ImageDTO>> result = executorService.submit(callable);

    return result.get();

}

From source file:accountgen.controller.Controller.java

private void setGender(Document doc, Person p) {
    Element gen = doc.select(".bcs").first().select(".content").first().select("img").first();
    String g = gen.attr("alt");
    p.setGender(g);/*ww  w . j  a v a 2s. c  o m*/
}

From source file:com.mycompany.grabberrasskazov.threads.ThreadForPageSave.java

public void indexStory(String pageUrl) {
    try {/* ww  w .jav a2  s  .  c o m*/
        String oldId = pageUrl.replace(GlobalVars.mainSite, "");
        if (!mainBean.storyExists(oldId)) {
            Stories r = new Stories();

            Document doc = Jsoup.connect(pageUrl)
                    .userAgent("Opera/9.80 (X11; Linux x86_64) " + "Presto/2.12.388 Version/12.16").get();

            Elements nameBlockElements = doc.select("b:containsOwn(?)");
            Element nameBlock = nameBlockElements.get(0);
            nameBlock = nameBlock.parent().parent();
            nameBlockElements = nameBlock.select("td:eq(1)");
            nameBlock = nameBlockElements.get(0);
            String storyName = nameBlock.text();
            r.setStoryName(storyName);

            // Start of processing writer
            Elements writerBlockElements = doc.select("b:containsOwn(?:)");
            Element writerBlock = writerBlockElements.get(0);
            writerBlock = writerBlock.parent().parent();
            writerBlockElements = writerBlock.select("td:eq(1)");
            writerBlock = writerBlockElements.get(0);

            String writersUrl = writerBlock.select("a:eq(0)").attr("href");
            String writersName = writerBlock.select("a:eq(0)").text();
            String writersContacts = writerBlock.select("a:eq(1)").attr("href");

            StoryWriters storyWriter = new StoryWriters();
            storyWriter.setOldId(writersUrl);
            storyWriter.setWriterEmail(writersContacts);
            storyWriter.setWriterName(writersName);
            storyWriter = mainBean.saveWriter(storyWriter);

            Set<StoriesToWritersRelations> storiesToWritersRelationses = new HashSet<StoriesToWritersRelations>();
            StoriesToWritersRelations storiesToWritersRelations = new StoriesToWritersRelations();
            storiesToWritersRelations.setStories(r);
            storiesToWritersRelations.setStoryWriters(storyWriter);
            r.setStoriesToWritersRelationses(storiesToWritersRelationses);

            // End of processing writer
            Set<StoriesToCategoriessRelations> catsRelationses = new HashSet<>();
            Elements katsInfo = doc.select("a[href*=ras.shtml?kat]");
            for (Element kat : katsInfo) {
                String katId = kat.attr("href");
                StoryCategories cat = mainBean.getCat(katId);

                StoriesToCategoriessRelations catsRelations = new StoriesToCategoriessRelations();
                catsRelations.setStoryCategories(cat);
                catsRelations.setStories(r);

                catsRelationses.add(catsRelations);

            }
            r.setStoriesToCategoriessRelationses(catsRelationses);

            Elements textBlocks = doc.select("p[align=justify]");
            Element textBlock = textBlocks.get(0);
            String textStr = textBlock.html();
            r.setStoryText(textStr.replace("\"", "'"));

            r.setOldId(oldId);

            mainBean.saveStory(r);
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    }

}

From source file:accountgen.controller.Controller.java

private void setGEO(Document doc, Person p) {
    String geo_x = doc.select("#geo").first().text().split(", ")[0];
    String geo_y = doc.select("#geo").first().text().split(", ")[1];
    p.setGEOX(geo_x);//www. j a v a  2  s  .  co m
    p.setGEOY(geo_y);
}

From source file:com.johan.vertretungsplan.parser.UntisInfoParser.java

@Override
public Vertretungsplan getVertretungsplan() throws IOException, JSONException {
    new LoginHandler(schule).handleLogin(executor, cookieStore, username, password);

    Document navbarDoc = Jsoup.parse(getNavbarDoc().replace("&nbsp;", ""));
    Element select = navbarDoc.select("select[name=week]").first();

    Vertretungsplan v = new Vertretungsplan();
    List<VertretungsplanTag> tage = new ArrayList<VertretungsplanTag>();

    String info = navbarDoc.select(".description").text();
    String stand;/*from   ww w.j  av  a  2  s .c  o  m*/
    try {
        stand = info.substring(info.indexOf("Stand:"));
    } catch (Exception e) {
        stand = "";
    }

    for (Element option : select.children()) {
        String week = option.attr("value");
        String letter = data.optString("letter", "w");
        if (data.optBoolean("single_classes", false)) {
            int classNumber = 1;
            for (String klasse : getAllClasses()) {
                String paddedNumber = String.format("%05d", classNumber);
                String url;
                if (data.optBoolean("w_after_number", false))
                    url = baseUrl + "/" + week + "/" + letter + "/" + letter + paddedNumber + ".htm";
                else
                    url = baseUrl + "/" + letter + "/" + week + "/" + letter + paddedNumber + ".htm";

                Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
                Elements days = doc.select("#vertretung > p > b, #vertretung > b");
                for (Element day : days) {
                    VertretungsplanTag tag = getTagByDatum(tage, day.text());
                    tag.setStand(stand);
                    tag.setDatum(day.text());
                    Element next = null;
                    if (day.parent().tagName().equals("p")) {
                        next = day.parent().nextElementSibling().nextElementSibling();
                    } else
                        next = day.parent().select("p").first().nextElementSibling();
                    if (next.className().equals("subst")) {
                        //Vertretungstabelle
                        if (next.text().contains("Vertretungen sind nicht freigegeben"))
                            continue;
                        parseVertretungsplanTable(next, data, tag);
                    } else {
                        //Nachrichten
                        parseNachrichten(next, data, tag);
                        next = next.nextElementSibling().nextElementSibling();
                        parseVertretungsplanTable(next, data, tag);
                    }
                    writeTagByDatum(tage, tag);
                }

                classNumber++;
            }
        } else {
            String url;
            if (data.optBoolean("w_after_number", false))
                url = baseUrl + "/" + week + "/" + letter + "/" + letter + "00000.htm";
            else
                url = baseUrl + "/" + letter + "/" + week + "/" + letter + "00000.htm";
            Document doc = Jsoup.parse(httpGet(url, schule.getData().getString("encoding")));
            Elements days = doc.select("#vertretung > p > b, #vertretung > b");
            for (Element day : days) {
                VertretungsplanTag tag = getTagByDatum(tage, day.text());
                tag.setStand(stand);
                tag.setDatum(day.text());
                Element next = null;
                if (day.parent().tagName().equals("p")) {
                    next = day.parent().nextElementSibling().nextElementSibling();
                } else
                    next = day.parent().select("p").first().nextElementSibling();
                if (next.className().equals("subst")) {
                    //Vertretungstabelle
                    if (next.text().contains("Vertretungen sind nicht freigegeben"))
                        continue;
                    parseVertretungsplanTable(next, data, tag);
                } else {
                    //Nachrichten
                    parseNachrichten(next, data, tag);
                    next = next.nextElementSibling().nextElementSibling();
                    parseVertretungsplanTable(next, data, tag);
                }
                tage.add(tag);
            }
        }
        v.setTage(tage);
    }
    return v;
}