Example usage for org.jsoup.nodes Document getElementsByTag

List of usage examples for org.jsoup.nodes Document getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:com.osw.enderecamento.util.EnderecamentoParseUtil.java

/**
 * extract info about a address/*from w w w.  j  a  va  2 s  . c o  m*/
 * 
 * @param html
 * @return address list
 */
public List<Enderecamento> parseEnderecamento(String html) {

    Document docHtml = Jsoup.parse(html);
    Elements tagTr = docHtml.getElementsByTag("tr");
    List<Enderecamento> enderecamentos = new LinkedList<Enderecamento>();

    for (Element element : tagTr) {
        if (element.getElementsByTag("td").size() > 0) {
            Elements tahTd = element.getElementsByTag("td");
            Enderecamento enderecamento = new Enderecamento();
            enderecamento.setLogradouro(tahTd.get(0).text());
            enderecamento.setBairro(tahTd.get(1).text());
            enderecamento.setCidade(tahTd.get(2).text());
            enderecamento.setCep(tahTd.get(3).text());
            enderecamentos.add(enderecamento);
        }
    }
    return enderecamentos;

}

From source file:org.sonatype.nexus.proxy.maven.routing.internal.scrape.NginxIndexScraper.java

@Override
protected Element getParentDirectoryElement(final Page page) {
    final Document doc = Jsoup.parseBodyFragment("<a href=\"../\">../</a>", page.getUrl());
    return doc.getElementsByTag("a").first();
}

From source file:popo.defcon.MsgMeCDC.java

void Parse() {
    String input = readPage();/*  ww  w.  j  a  v  a2 s .co  m*/
    if (input == null) {
        System.out.println("Error connecting to Internet");
        return;
    }
    String time;
    Document cdc = Jsoup.parse(input);
    Elements notices = cdc.getElementsByTag("tbody");
    Elements alerts = notices.get(1).getElementsByTag("tr");
    alerts.remove(0);
    System.out.println("Current Old Time = " + oldtime);
    for (Element node : alerts) {
        Elements content = node.getElementsByTag("td");
        time = content.last().text();
        if (convertTime(time).compareTo(convertTime(oldtime)) <= 0) {
            MsgMeCDC.oldtime = alerts.get(0).getElementsByTag("td").last().text();
            return;
        }
        System.out.println("Current notice time :" + convertTime(time));
        Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "Current notice time :" + convertTime(time));
        //for (Element text : content) {
        //    System.out.println(text.text());
        //}
        String smsTitle = content.get(1).text();
        //String smsCompanyName = content.get(2).text();
        String smsNoticeTime = content.get(4).text();
        String preSMStext = content.get(3).text();
        String randomtext = "Placement/ Internship Form Description Files";
        int start = preSMStext.indexOf(randomtext) + randomtext.length() + 1;
        int twilio = "Sent from your Twilio trial account - ".length();
        int end = 150 - (smsTitle.length() + smsNoticeTime.length() + twilio + 2);
        String smsContent = preSMStext.substring(start, start + end);
        String sms = smsTitle + '\n' + smsNoticeTime + '\n' + smsContent;
        System.out.println(sms);
        sendSMS(sms);
        Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO, "SMS sent: " + sms);
        Logger.getLogger(MsgMeCDC.class.getName()).log(Level.INFO,
                "Length of SMS is " + (sms.length() + twilio));
        System.out.println("\nLength of SMS is " + (sms.length() + twilio));
        System.out.println("");
    }
    //System.out.println(notices.toString());
}

From source file:com.crosstreelabs.cognitio.gumshoe.format.HtmlFormatHandler.java

@Override
public void processLinks(final Visit visit) {
    try {//from  w w  w.j  a  va  2s  .c  o  m
        String charset = StringUtils.defaultIfBlank(visit.contentCharset, "UTF-8");

        Document doc = Jsoup.parse(visit.contentStream, charset, visit.result.location);
        Elements anchors = doc.getElementsByTag("a");

        for (Element e : anchors) {
            String url = stripURLFragmentIdentifier(e.attr("abs:href"));
            String uri = stripURLFragmentIdentifier(e.attr("href").toLowerCase());
            if (uri.isEmpty() || url.isEmpty() || uri.contains("javascript:") || uri.contains("mailto:")
                    || uri.contains("@")) {
                continue;
            }

            visit.discoveredLinks.add(URL.parse(url).toString()); // TODO Need to add the link text as the title
        }
        visit.contentStream.reset();
    } catch (GalimatiasParseException | IOException ex) {
        throw new RuntimeException(ex);
    }
}

From source file:com.jaspersoft.android.jaspermobile.data.repository.report.page.HtmlPageCreator.java

@Override
@NonNull/*w  w  w. java 2  s  .c o m*/
public ReportPage create() throws Exception {
    String range = pageRequest.getRange();
    PageRange pageRange = PageRange.parse(range);

    ReportExportOptions options = ReportExportOptions.builder()
            .withFormat(ReportFormat.valueOf(pageRequest.getFormat())).withPageRange(pageRange).build();

    ReportExport export = execution.export(options);
    ReportExportOutput output = export.download();

    InputStream reportExport = output.getStream();
    InputStream customScript = context.getAssets().open("rest-report.js");

    try {
        Reader reader = new InputStreamReader(customScript);
        Template template = Mustache.compiler().compile(reader);

        int pageIndex = Integer.valueOf(range) - 1;
        Map<String, Object> data = new HashMap<>();
        data.put("jasperPrintName", execution.getExecutionId());
        data.put("pageIndex", pageIndex);
        data.put("isPro", server.isProEdition());
        String customJs = template.execute(data);

        DataNode indexHeadScript = DataNode.createFromEncoded("<script>" + customJs + "</script>", "");

        Document document = Jsoup.parse(reportExport, Charset.defaultCharset().name(), "");
        Elements head = document.getElementsByTag("head");
        head.append(indexHeadScript.toString());

        byte[] exportContent = document.toString().getBytes(Charset.forName("UTF-8"));
        return new ReportPage(exportContent, output.isFinal());
    } finally {
        IOUtils.closeQuietly(reportExport);
        IOUtils.closeQuietly(customScript);
    }
}

From source file:web.analyzer.utils.Utils.java

public boolean hasLoginForm(Document doc) {
    Elements formElements = doc.getElementsByTag("form");
    for (Element formElement : formElements) {

        String frmElementAsString = formElement.toString().toLowerCase().replace("'", "\"");
        Pattern inputTextTagPattern = Pattern.compile("type=\"text\"");
        Matcher inputTextTagMatcher = inputTextTagPattern.matcher(frmElementAsString);
        int inputTextTagCount = 0;
        while (inputTextTagMatcher.find()) {
            inputTextTagCount++;//w  ww  . jav  a  2  s.co  m
        }

        Pattern inputEmailTagPattern = Pattern.compile("type=\"email\"");
        Matcher inputEmailTagMatcher = inputEmailTagPattern.matcher(frmElementAsString);
        int inputEmailTagCount = 0;
        while (inputEmailTagMatcher.find()) {
            inputEmailTagCount++;
        }

        Pattern inputPasswordTagPattern = Pattern.compile("type=\"password\"");
        Matcher inputPasswordTagMatcher = inputPasswordTagPattern.matcher(frmElementAsString);
        int inputPasswordTagCount = 0;
        while (inputPasswordTagMatcher.find()) {
            inputPasswordTagCount++;
        }

        if ((inputTextTagCount == 1 || inputEmailTagCount == 1) && inputPasswordTagCount == 1) {
            return true;
        }
    }

    return false;
}

From source file:mailbox.CreationViaEmail.java

private static String replaceCidWithAttachments(String html, Map<String, Attachment> attachments) {
    Document doc = Jsoup.parse(html);
    String[] attrNames = { "src", "href" };

    for (String attrName : attrNames) {
        Elements tags = doc.select("*[" + attrName + "]");
        for (Element tag : tags) {
            String uriString = tag.attr(attrName).trim();

            if (!uriString.toLowerCase().startsWith("cid:")) {
                continue;
            }/*w w  w.ja  v a  2 s . c  o m*/

            String cid = uriString.substring("cid:".length());

            if (!attachments.containsKey(cid)) {
                continue;
            }

            Long id = attachments.get(cid).id;
            tag.attr(attrName, controllers.routes.AttachmentApp.getFile(id).url());
        }
    }

    Elements bodies = doc.getElementsByTag("body");

    if (bodies.size() > 0) {
        return bodies.get(0).html();
    } else {
        return doc.html();
    }
}

From source file:com.mycompany.crawlertest.GrabManager.java

public void go(URL start) throws InterruptedException, IOException {
    // stay within same site
    urlBase = start.toString().replaceAll("(.*//.*/).*", "$1");
    StopWatch stopWatch = new StopWatch();

    stopWatch.start();// w  ww .jav a  2  s  .  co  m

    submitNewURL(start, 0);

    while (checkPageGrabs())
        ;
    stopWatch.stop();

    System.out.println("Found " + masterList.size() + " urls");
    System.out.println("in " + stopWatch.getTime() / 1000 + " seconds");

    for (String url : Uttils.URLS) {
        System.out.println(url);

        if (!url.contains("http")) {
            url = start.getProtocol() + "://" + start.getHost() + url;
            //System.out.println("com.mycompany.crawlertest.GrabManager.go() : " + );
        }
        try {
            Document document = Jsoup.parse(new URL(url), TIMEOUT);

            if (document.getElementsByTag("h1") != null && document.getElementsByTag("h1").size() != 0) {
                Elements videoTag = document.getElementsByTag("iframe");
                if (videoTag != null && videoTag.size() != 0) {
                    if (videoTag.get(0).hasAttr("allowfullscreen")) {
                        String tag = videoTag.get(0).toString();
                        System.out.println(document.getElementsByTag("h1").get(0).text() + " ___  " + tag);
                    }
                }
            }
        } catch (Exception EX) {
            System.out.println("ERROR : " + EX.getMessage());
        }
        //processHeaders(document.select("h1"));
    }
}

From source file:io.gravitee.management.service.impl.EmailServiceImpl.java

private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText)
        throws Exception {
    final Document document = Jsoup.parse(htmlText);

    final List<String> resources = new ArrayList<>();

    final Elements imageElements = document.getElementsByTag("img");
    resources.addAll(//from w  w  w.  j  a  v  a  2  s  . c  o m
            imageElements.stream().filter(imageElement -> imageElement.hasAttr("src")).map(imageElement -> {
                final String src = imageElement.attr("src");
                imageElement.attr("src", "cid:" + src);
                return src;
            }).collect(Collectors.toList()));

    final String html = document.html();
    mailMessage.setText(html, true);

    for (final String res : resources) {
        final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
        mailMessage.addInline(res, templateResource,
                MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(res));
    }

    return html;
}

From source file:com.aestasit.markdown.slidery.converters.TextTemplateConverter.java

protected Elements getSlideCollection(Document slidesDocument) {
    return slidesDocument.getElementsByTag("section");
}