Example usage for org.jsoup.nodes Document location

Introduction

In this page you can find the example usage for org.jsoup.nodes Document location.

Prototype

String location

To view the source code for org.jsoup.nodes Document location.

Click Source Link

Usage

From source file:index.IndexManager.java

public static Triple<SolrInputDocument, Collection<String>, Collection<String>> index(Document document) {
    final SolrInputDocument index = new SolrInputDocument();
    index.setField("id", document.location());
    index.setField("time", String.valueOf(System.currentTimeMillis()));
    index.setField("title", document.title());

    final Set<String> links = document.select("a[href]").stream().map(e -> e.attr("abs:href"))
            .collect(Collectors.toSet());
    final Set<String> media = document.select("[src]").stream().map(e -> e.attr("abs:src"))
            .collect(Collectors.toSet());

    links.forEach(link -> index.addField("link", link));
    media.forEach(link -> index.addField("media", link));

    formatText(document.getElementsByTag("h1").stream()).forEach(e -> index.addField("h1", e));

    formatText(document.getElementsByTag("h2").stream()).forEach(e -> index.addField("h2", e));

    formatText(document.getElementsByTag("h3").stream()).forEach(e -> index.addField("h3", e));

    formatText(document.getElementsByTag("strong").stream()).forEach(e -> index.addField("strong", e));

    formatText(document.getElementsByTag("em").stream()).forEach(e -> index.addField("em", e));

    formatText(document.getElementsByTag("b").stream()).forEach(e -> index.addField("b", e));

    formatText(document.getElementsByTag("u").stream()).forEach(e -> index.addField("u", e));

    formatText(document.getElementsByTag("i").stream()).forEach(e -> index.addField("i", e));

    int i = 0;//from w w  w.jav  a  2s . c om
    Collection<String> text = chunkToLength(document.text());
    for (String chunk : text)
        index.addField(++i + "_text", chunk);

    return Triple.of(index, links, media);
}

From source file:com.ndemyanovskyi.backend.site.Site.java

private static void checkRedirected(Document doc, String url) throws IOException {
    if (!doc.location().equals(url)) {
        LOG.log(Level.WARNING, "Connection is redirected: " + "original url = '" + url + "; final url = "
                + doc.location() + ".");
        /*throw new ConnectException("Connection is redirected: "
            + "original url = '" + url + "; final url = " + doc.location() + ".");*/
    }//from   w w  w .  j av  a 2s  .  c om
}

From source file:com.techcavern.wavetact.eventListeners.FunMsgListener.java

@Override
public void onMessage(MessageEvent event) throws Exception {
    boolean funmsg = false;
    Record rec = DatabaseUtils.getChannelProperty(IRCUtils.getNetworkNameByNetwork(event.getBot()),
            event.getChannel().getName(), "funmsg");
    if (rec != null && rec.getValue(Channelproperty.CHANNELPROPERTY.VALUE).equalsIgnoreCase("true"))
        funmsg = true;/*from   ww w  .  ja v  a 2 s.  c  o m*/
    final boolean funmsg2 = funmsg;
    class process implements Runnable {
        public void run() {
            String commandchar = IRCUtils.getCommandChar(event.getBot(), event.getChannel());
            if (commandchar == null) {
                return;
            }
            if (PermUtils.getPermLevel(event.getBot(), event.getUser().getNick(), event.getChannel()) > -2
                    && !event.getMessage().startsWith(commandchar)) {
                String[] message = StringUtils.split(event.getMessage(), " ");
                for (String arg : message) {
                    try {
                        arg = Colors.removeFormattingAndColors(arg);
                        if (arg.toLowerCase().replaceAll("o+", "o").replaceAll("0+", "o").contains("yolo")
                                && funmsg2) {
                            if (IRCUtils.checkIfCanKick(event.getChannel(), event.getBot(), event.getUser())) {
                                IRCUtils.sendKick(event.getBot().getUserBot(), event.getUser(), event.getBot(),
                                        event.getChannel(), "YOLO");
                            } else {
                                IRCUtils.sendAction(event.getUser(), event.getBot(), event.getChannel(),
                                        "kicks " + IRCUtils.noPing(event.getUser().getNick()) + " (YOLO)", "");
                            }
                            return;
                        }
                        Record autourlRecord = DatabaseUtils.getChannelProperty(
                                IRCUtils.getNetworkNameByNetwork(event.getBot()), event.getChannel().getName(),
                                "autourl");
                        boolean autourl = autourlRecord != null
                                && autourlRecord.getValue(CHANNELPROPERTY.VALUE).equalsIgnoreCase("true");
                        Record ignorehttpRecord = DatabaseUtils.getChannelProperty(
                                IRCUtils.getNetworkNameByNetwork(event.getBot()), event.getChannel().getName(),
                                "ignorehttp");
                        boolean ignorehttp = ignorehttpRecord != null
                                && ignorehttpRecord.getValue(CHANNELPROPERTY.VALUE).equalsIgnoreCase("true");
                        if (ignorehttp && !arg.startsWith("https://") && !arg.startsWith("http://")) {
                            arg = "http://" + arg;
                        }
                        if ((funmsg2 || autourl) && Registry.urlValidator.isValid(arg)) {
                            try {
                                Document doc = Jsoup.connect(arg).userAgent(Registry.USER_AGENT).get();
                                if (doc.location().contains("stop-irc-bullying.eu") && funmsg2) {
                                    if (IRCUtils.checkIfCanKick(event.getChannel(), event.getBot(),
                                            event.getUser())) {
                                        IRCUtils.sendKick(event.getBot().getUserBot(), event.getUser(),
                                                event.getBot(), event.getChannel(),
                                                "?  \\ ()/  ? [https://goo.gl/Tkb9dh]");
                                    } else {
                                        IRCUtils.sendAction(event.getUser(), event.getBot(), event.getChannel(),
                                                "kicks " + IRCUtils.noPing(event.getUser().getNick())
                                                        + " (?  \\ ()/  ?) [https://goo.gl/Tkb9dh]",
                                                "");
                                    }
                                    /**
                                     * My apologies to those using this site responsibly. But in my experience, this site has been linked numerous times for entertainment purposes
                                     * In fact, I have yet to notice a time when it is linked for its intended purpose. And if you are using this site for its intended purpose, please think of
                                     * better of way of expressing how you feel. Linking a generic site rarely solves any problems. Instead explain to the person how and why they offended you. If
                                     * they ignore you, then you ignore them.
                                     */
                                } else if (autourl) {
                                    String title = doc.title();
                                    if (!title.isEmpty()) {
                                        IRCUtils.sendMessage(event.getBot(), event.getChannel(),
                                                "[" + IRCUtils.noPing(event.getUser().getNick()) + "] " + title,
                                                "");
                                    }
                                }
                            } catch (Exception e) {
                                e.printStackTrace();
                            }
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
        }
    }
    Registry.threadPool.execute(new process());
}

From source file:me.rkfg.xmpp.bot.plugins.CoolStoryPlugin.java

private String fetchStory(Website website) throws IOException {
    int roll = 0;
    String result;//  w w w.j a va  2s. c o  m
    int resultLength;
    int resultLines;

    //noinspection ConstantConditions
    do {
        roll++;

        final Document doc = Jsoup.connect(website.getUrlString()).userAgent(DEFAULT_UA).get();
        doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
        logger.info("Fetched a story from {}", doc.location());

        final Element story = doc.select(website.getCssQuery()).first();
        if (story == null) {
            return ERROR_COULD_NOT_PARSE;
        }

        story.select("div").remove();
        story.select("img").forEach(img -> img.replaceWith(new TextNode(img.attr("src"), "")));
        story.select("br").after("\\n");
        story.select("p").before("\\n\\n");
        final String storyHtml = story.html().replaceAll("\\\\n", "\n");

        result = Jsoup.clean(storyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))
                .trim();
        resultLength = result.length();
        resultLines = countLines(result);

    } while (CONFIG_REROLL_LONG_STORIES
            && (resultLength > CONFIG_MAX_STORY_LENGTH || resultLines > CONFIG_MAX_STORY_LINES)
            && roll <= CONFIG_MAX_ROLLS);

    return result;
}

From source file:org.ow2.proactive_grid_cloud_portal.cli.cmd.sched.PackageDownloader.java

/**
 * This method browses a web directory and all its subdirectories and returns a set containing all the urls of their contents.
 *
 * @param dirUrl//from www .j  a v  a 2 s  .  c  om
 * @param cummulativeRelativeUrl a string used for keeping track of a directory structure in the recursive context. MUST BE empty ("") in the first call.
 * @return
 * @throws IOException
 * @throws URISyntaxException
 */
private Set<String> listWebDirectoryContent(URL dirUrl, String cummulativeRelativeUrl)
        throws IOException, URISyntaxException {
    Set<String> result = Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());

    //Load the directory listing page and extract all the links it contains.
    Document doc = Jsoup.connect(dirUrl.toString()).get();
    logger.info("Listing directories from: " + doc.location());
    for (Element file : doc.select("a[href]")) {
        String relativeURL = file.attr("href");
        // skip sort urls in Apache Tomcat
        if (relativeURL.startsWith("?")) {
            continue;
        }
        // skip parent directory url
        if (isRelativeParentDirectoryUrl(relativeURL)) {
            continue;
        }
        result.add(cummulativeRelativeUrl + relativeURL);
    }

    for (String relativeURL : result) {
        URL absoluteUrl = new URL(dirUrl, relativeURL);
        if (!isFileURL(relativeURL)) {
            result.addAll(listWebDirectoryContent(absoluteUrl, relativeURL));
        }
    }
    return result;
}