Example usage for org.jsoup.nodes Document getElementsByTag

List of usage examples for org.jsoup.nodes Document getElementsByTag

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByTag.

Prototype

public Elements getElementsByTag(String tagName) 

Source Link

Document

Finds elements, including and recursively under this element, with the specified tag name.

Usage

From source file:org.arb.extractor.DomTreeWalker.java

/**
 * Extract localizable resource from a code unit.
 * /*from  ww w .jav a 2  s.  co m*/
 * @param codeUnit AbstractCodeUnit instance that has all information related to a source file.
 */
@Override
public void extractResource(AbstractCodeUnit codeUnit) {
    Document doc = codeUnit.getDomDocument();
    Elements elements = doc.getElementsByTag("html");
    for (int i = 0; i < elements.size(); ++i) {
        extractResourceOnElement(elements.get(i), codeUnit);
    }
}

From source file:org.asqatasun.processing.ProcessRemarkServiceImplTest.java

/**
 * Test of setDocument method, of class ProcessRemarkServiceImpl.
 *//*  w  w w .j a  va  2 s. c  om*/
public void testGetSnippetFromElement() {
    ProcessRemarkServiceImpl instance = new ProcessRemarkServiceImpl(null, null, null);

    //--------------------------------------------------------------------//
    //-----------------------Test1----------------------------------------//
    //--------------------------------------------------------------------//
    String rawHtml = "<label> <span>Rechercher:</span> "
            + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" "
            + "class=\"text\" id=\"searchfield\" " + "name=\"search&qudsqqqssqdsqdsqdo\" /></label>";
    Document document = Jsoup.parse(rawHtml);
    Element element = document.getElementsByTag("label").iterator().next();
    String snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    String expectedSnippet = "<label> <span>Rechercher:</span> "
            + "<input type=\"text\" onkeyup=\"return CatchEnter(event);\" "
            + "class=\"text\" id=\"searchfield\" " + "name=\"search&amp;qudsqqqssqdsqdsqdo\" />[...]</label>";
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test2----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = "<label> <span>New Rechercher:</span> "
            + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\" "
            + " id=\"searchfield\" class=\"myclass other-class1 other-class2\" > " + "anything</p></label>";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("label").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = "<label> <span>New Rechercher:</span> "
            + "<p title=\"some title here\" onkeyup=\"return CatchEnter(event);\""
            + " id=\"searchfield\" class=\"myclass other-class1 other-class2\">" + "[...]</p>[...]</label>";
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test3----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = "<iframe align=\"left\" width=\"315px\" " + "scrolling=\"no\" height=\"160px\" frameborder=\"0\" "
            + "id=\"link-meteo\" src=\"http://www.anyUrl.com/module/onelocationsearch?ShowSearch=true&amp;StartDate=2012-06-01&amp;Days=2&amp;location=bruxelles&amp;url=http://meteo1.lavenir.net&amp;cssfile=http://lavenir.net/extra/weather/styles.css\">"
            + "</iframe> ";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("iframe").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = rawHtml.trim();
    assertEquals(expectedSnippet, snippet);

    //--------------------------------------------------------------------//
    //-----------------------Test4----------------------------------------//
    //--------------------------------------------------------------------//
    rawHtml = " <center>  <script type=\"text/javascript\">    if (articledetail == false) initAdhese('IMU.SUPER.WIDE');     </script> "
            + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\"></script> "
            + " <div class=\"adhese_300x250\">  <script src=\"http://1.adhesecdn.be/pool/lib/68641.js?t=1371729603000\"></script> "
            + "<script src=\"http://anyUrl.com/pagead/show_ads.js\" type=\"text/javascript\"></script>"
            + "<ins style=\"display:inline-table;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\">"
            + "<ins style=\"display:block;border:none;height:250px;margin:0;padding:0;position:relative;visibility:visible;width:300px\" id=\"aswift_1_anchor\">"
            + "<iframe width=\"300\" scrolling=\"no\" height=\"250\" frameborder=\"0\" style=\"left:0;position:absolute;top:0;\" name=\"aswift_1\" id=\"aswift_1\" onload=\"var i=this.id,s=window.google_iframe_oncopy,H=s&amp;&amp;s.handlers,h=H&amp;&amp;H[i],w=this.contentWindow,d;try{d=w.document}catch(e){}if(h&amp;&amp;d&amp;&amp;(!d.body||!d.body.firstChild)){if(h.call){setTimeout(h,0)}else if(h.match){w.location.replace(h)}}\" allowtransparency=\"true\" hspace=\"0\" vspace=\"0\" marginheight=\"0\" marginwidth=\"0\"></iframe>"
            + "</ins>" + "</ins>" + "</div> " + "</center> ";
    document = Jsoup.parse(rawHtml);
    element = document.getElementsByTag("center").iterator().next();
    snippet = StringEscapeUtils.unescapeHtml4(instance.getSnippetFromElement(element));
    expectedSnippet = "<center> <script type=\"text/javascript\"> if (articledetail == false) initAdhese('IMU.SUPER.WIDE'); </script> "
            + "<script src=\"http://anyUrl.com/ad3/sl_ave_home_-IMU.SUPER.WIDE/lafr/rn92/pv1/brFirefox;Firefox17;Linux;screenundefined/in;prx;;gmbl;/?t=1381234838205\" type=\"text/javascript\">[...]</script>"
            + "[...]</center>";
    assertEquals(expectedSnippet, snippet);
}

From source file:org.b3log.solo.plugin.list.ListHandler.java

@Override
public void action(final Event<JSONObject> event) throws EventException {
    final JSONObject data = event.getData();
    final JSONObject article = data.optJSONObject(Article.ARTICLE);

    String content = article.optString(Article.ARTICLE_CONTENT);

    final Document doc = Jsoup.parse(content, StringUtils.EMPTY, Parser.htmlParser());
    doc.outputSettings().prettyPrint(false);

    final StringBuilder listBuilder = new StringBuilder();

    listBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + Latkes.getStaticServePath()
            + "/plugins/list/style.css\" />");

    final Elements hs = doc.select("h1, h2, h3, h4, h5");

    listBuilder.append("<ul class='b3-solo-list'>");
    for (int i = 0; i < hs.size(); i++) {
        final Element element = hs.get(i);
        final String tagName = element.tagName().toLowerCase();
        final String text = element.text();
        final String id = "b3_solo_" + tagName + "_" + i;

        element.before("<span id='" + id + "'></span>");

        listBuilder.append("<li class='b3-solo-list-").append(tagName).append("'><a href='#").append(id)
                .append("'>").append(text).append("</a></li>");
    }/* w ww  .  ja va2  s .c o m*/
    listBuilder.append("</ul>");

    final Element body = doc.getElementsByTag("body").get(0);

    content = listBuilder.toString() + body.html();

    article.put(Article.ARTICLE_CONTENT, content);
}

From source file:org.b3log.symphony.util.Markdowns.java

/**
 * Gets the safe HTML content of the specified content.
 *
 * @param content the specified content/*from w  w  w. j  ava 2s. c o  m*/
 * @param baseURI the specified base URI, the relative path value of href will starts with this URL
 * @return safe HTML content
 */
public static String clean(final String content, final String baseURI) {
    final Document.OutputSettings outputSettings = new Document.OutputSettings();
    outputSettings.prettyPrint(false);

    final String tmp = Jsoup.clean(content, baseURI,
            Whitelist.relaxed().addAttributes(":all", "id", "target", "class")
                    .addTags("span", "hr", "kbd", "samp", "tt", "del", "s", "strike", "u")
                    .addAttributes("iframe", "src", "width", "height", "border", "marginwidth", "marginheight")
                    .addAttributes("audio", "controls", "src")
                    .addAttributes("video", "controls", "src", "width", "height")
                    .addAttributes("source", "src", "media", "type")
                    .addAttributes("object", "width", "height", "data", "type")
                    .addAttributes("param", "name", "value")
                    .addAttributes("input", "type", "disabled", "checked").addAttributes("embed", "src", "type",
                            "width", "height", "wmode", "allowNetworking"),
            outputSettings);
    final Document doc = Jsoup.parse(tmp, baseURI, Parser.htmlParser());

    final Elements ps = doc.getElementsByTag("p");
    for (final Element p : ps) {
        p.removeAttr("style");
    }

    final Elements iframes = doc.getElementsByTag("iframe");
    for (final Element iframe : iframes) {
        final String src = StringUtils.deleteWhitespace(iframe.attr("src"));
        if (StringUtils.startsWithIgnoreCase(src, "javascript")
                || StringUtils.startsWithIgnoreCase(src, "data:")) {
            iframe.remove();
        }
    }

    final Elements objs = doc.getElementsByTag("object");
    for (final Element obj : objs) {
        final String data = StringUtils.deleteWhitespace(obj.attr("data"));
        if (StringUtils.startsWithIgnoreCase(data, "data:")
                || StringUtils.startsWithIgnoreCase(data, "javascript")) {
            obj.remove();

            continue;
        }

        final String type = StringUtils.deleteWhitespace(obj.attr("type"));
        if (StringUtils.containsIgnoreCase(type, "script")) {
            obj.remove();
        }
    }

    final Elements embeds = doc.getElementsByTag("embed");
    for (final Element embed : embeds) {
        final String data = StringUtils.deleteWhitespace(embed.attr("src"));
        if (StringUtils.startsWithIgnoreCase(data, "data:")
                || StringUtils.startsWithIgnoreCase(data, "javascript")) {
            embed.remove();

            continue;
        }
    }

    final Elements as = doc.getElementsByTag("a");
    for (final Element a : as) {
        a.attr("rel", "nofollow");

        final String href = a.attr("href");
        if (href.startsWith(Latkes.getServePath())) {
            continue;
        }

        a.attr("target", "_blank");
    }

    final Elements audios = doc.getElementsByTag("audio");
    for (final Element audio : audios) {
        audio.attr("preload", "none");
    }

    final Elements videos = doc.getElementsByTag("video");
    for (final Element video : videos) {
        video.attr("preload", "none");
    }

    String ret = doc.body().html();
    ret = ret.replaceAll("(</?br\\s*/?>\\s*)+", "<br>"); // patch for Jsoup issue

    return ret;
}

From source file:org.crazyt.xgogdownloader.Main.java

public static void main(String[] args) {
    Util util = new Util();
    Config.sVersionString = VERSION_STRING + VERSION_NUMBER;
    Config.sConfigDirectory = "xgogdownloader";
    Config.sCookiePath = "cookies.txt";
    Config.sConfigFilePath = "config.cfg";
    Config.sXMLDirectory = "xgogdownloader/xml";
    // Create xgogdownloader directories
    File path = Factory.newFile(Config.sXMLDirectory);
    if (!path.exists()) {
        if (!path.mkdirs()) {
            System.out.print("Failed to create directory: ");
            System.out.print(path);
            throw new RuntimeException("Failed to create directory. ");
        }/*from w  ww. j a  v a  2 s . c  o  m*/
    }
    path = Factory.newFile(Config.sConfigDirectory);
    if (!path.exists()) {
        if (!path.mkdirs()) {
            System.out.print("Failed to create directory: ");
            System.out.print(path);
            throw new RuntimeException("Failed to create directory. ");
        }
    }
    // Create help text for --platform option
    String platform_text = "Select which installers are downloaded\n";
    int platform_sum = 0;
    for (int i = 0; i < GlobalConstants.PLATFORMS.size(); ++i) {
        platform_text += GlobalConstants.PLATFORMS.get(i).platformId + " = "
                + GlobalConstants.PLATFORMS.get(i).platformString + "\n";
        platform_sum += GlobalConstants.LANGUAGES.get(i).languageId;
    }
    platform_text += platform_sum + " = All";

    // Create help text for --language option
    String language_text = "Select which language installers are downloaded\n";
    int language_sum = 0;
    for (int i = 0; i < GlobalConstants.LANGUAGES.size(); ++i) {
        language_text += GlobalConstants.LANGUAGES.get(i).languageId + " = "
                + GlobalConstants.LANGUAGES.get(i).languageString + "\n";
        language_sum += GlobalConstants.LANGUAGES.get(i).languageId;
    }
    language_text += "Add the values to download multiple languages\nAll = " + language_sum + "\n"
            + "French + Polish = " + GlobalConstants.LANGUAGE_FR + "+" + GlobalConstants.LANGUAGE_PL + " = "
            + GlobalConstants.LANGUAGE_FR + GlobalConstants.LANGUAGE_PL;

    // Create help text for --check-orphans
    String[] orphans_regex_default = new String[] { "zip", "exe", "bin", "dmg", "old" };
    // List<File> files = (List<File>) FileUtils.listFiles(dir, extensions,
    // true);
    String check_orphans_text = "Check for orphaned files (files found on local filesystem that are not found on GOG servers). Sets regular expression filter (Perl syntax) for files to check. If no argument is given then the regex defaults to '"
            + StringUtils.join(orphans_regex_default, ",") + "'";

    CommandOptions options_cli_all = new CommandOptions();
    CommandOptions options_cli_no_cfg = new CommandOptions();
    ConfigOptions options_cli_cfg = new ConfigOptions();
    ConfigOptions options_cfg_only = new ConfigOptions();
    ConfigOptions options_cfg_all = new ConfigOptions();
    try {
        OptionValue<Boolean> bInsecure = new OptionValue<>(false);
        OptionValue<Boolean> bNoColor = new OptionValue<>(false);
        OptionValue<Boolean> bNoUnicode = new OptionValue<>(false);
        OptionValue<Boolean> bNoDuplicateHandler = new OptionValue<>(false);
        OptionValue<Boolean> bNoCover = new OptionValue<>(false);
        OptionValue<Boolean> bNoInstallers = new OptionValue<>(false);
        OptionValue<Boolean> bNoExtras = new OptionValue<>(false);
        OptionValue<Boolean> bNoPatches = new OptionValue<>(false);
        OptionValue<Boolean> bNoLanguagePacks = new OptionValue<>(false);
        OptionValue<Boolean> bNoRemoteXML = new OptionValue<>(false);
        OptionValue<Boolean> bNoSubDirectories = new OptionValue<>(false);

        OptionValue<String> sGame = new OptionValue<>("free");
        OptionValue<String> sToken = new OptionValue<>("");
        OptionValue<String> sSecret = new OptionValue<>("");
        OptionValue<String> sSearch = new OptionValue<>("");
        OptionValue<Boolean> bList = new OptionValue<>(false);
        OptionValue<Boolean> bDownload = new OptionValue<>(false);
        OptionValue<Integer> iDownloadRate = new OptionValue<>(0);

        // //switch to OptionBuilder
        // Commandline options (no config file)
        options_cli_no_cfg.addOption("debug", "d", false, "Print debug messages");
        options_cli_no_cfg.addOption("help", "h", false, "Print help message");
        options_cli_no_cfg.addOption("version", false, "Print version information");
        options_cli_no_cfg.addOption("versionUpdate", false, "Updates this program to the current version.");
        options_cli_no_cfg.addOption("login", true, "Login");
        // config.bLogin false
        options_cli_no_cfg.addOption(bList, "list", false, "List games");
        // config.bList false
        options_cli_no_cfg.addOption(sSearch, "search", true, "search games by title");
        options_cli_no_cfg.addOption("listdetails", "list-details", true, "List games with detailed info"); // config.bListDetails
        // false
        options_cli_no_cfg.addOption(bDownload, "download", false, "Download");
        // config.bDownload false
        options_cli_no_cfg.addOption("repair", true,
                "Repair downloaded files\nUse --repair --download to redownload files when filesizes don't match (possibly different version). Redownload will delete the old file"); // config.bRepair
        // false
        options_cli_no_cfg.addOption("game", true,
                "Set regular expression filter\nfor download/list/repair (Perl syntax)\nAliases: \"all\", \"free\""); // config.sGameRegex
        // ""
        options_cli_no_cfg.addOption("createxml", "create-xml", true,
                "Create GOG XML for file\n\"automatic\" to enable automatic XML creation"); // config.sXMLFile
        // ""
        options_cli_no_cfg.addOption("updatecheck", "update-check", true, "Check for update notifications");
        // config.bUpdateCheck false

        options_cli_no_cfg.addOption("checkorphans", "check-orphans", true, check_orphans_text);
        // config.sOrphanRegex ""

        options_cli_no_cfg.addOption("status", true,
                "Show status of files\n\nOutput format:\nstatuscode gamename filename filesize filehash\n\nStatus codes:\nOK - File is OK\nND - File is not downloaded\nMD5 - MD5 mismatch, different version");// config.bCheckStatus
        // false
        options_cli_no_cfg.addOption("saveconfig", "save-config", true,
                "Create config file with current settings");
        // config.bSaveConfig false

        options_cli_no_cfg.addOption("resetconfig", "reset-config", true, "Reset config settings to default");
        // config.bResetConfig false

        options_cli_no_cfg.addOption("report", true, "Save report of downloaded/repaired files");
        // config.bReport false

        // Commandline options (config file)
        options_cli_cfg.addOption("directory", true, "Set download directory");
        // config.sDirectory ""

        options_cli_cfg.addOption(iDownloadRate, "limitRate", true,
                "Limit download rate to value in kB\n0 = unlimited");
        // config.iDownloadRate 0

        options_cli_cfg.addOption("xmlDirectory", true, "Set directory for GOG XML files");
        // config.sXMLDirectory ""

        options_cli_cfg.addOption("chunkSize", true, "Chunk size (in MB) when creating XML");
        // config.iChunkSize 10

        options_cli_cfg.addOption("platform", true, platform_text);
        // config.iInstallerType GlobalConstants.PLATFORM_WINDOWS

        options_cli_cfg.addOption("language", true, language_text);
        // config.iInstallerLanguage GlobalConstants.LANGUAGE_EN

        options_cli_cfg.addOption("noInstallers", true, "Don't download/list/repair installers");
        // bNoInstallers false

        options_cli_cfg.addOption("noExtras", true, "Don't download/list/repair extras");
        // bNoExtras false

        options_cli_cfg.addOption("noPatches", true, "Don't download/list/repair patches");
        // bNoPatches false

        options_cli_cfg.addOption("noLanguagePacks", true, "Don't download/list/repair language packs");
        // bNoLanguagePacks false

        options_cli_cfg.addOption("noCover", true, "Don't download cover images");
        // bNoCover false

        options_cli_cfg.addOption("noRemoteXml", true, "Don't use remote XML for repair");
        // bNoRemoteXML false

        options_cli_cfg.addOption(bNoUnicode, "noUnicode", true, "Don't use Unicode in the progress bar");
        // bNoUnicode false

        options_cli_cfg.addOption(bNoColor, "noColor", true, "Don't use coloring in the progress bar");
        // bNoColor false

        options_cli_cfg.addOption("noDuplicateHandling", true,
                "Don't use duplicate handler for installers\nDuplicate installers from different languages are handled separately");// bNoDuplicateHandler
        // false
        options_cli_cfg.addOption("noSubdirectories", true,
                "Don't create subdirectories for extras, patches and language packs");
        // bNoSubDirectories false

        options_cli_cfg.addOption("verbose", true, "Print lots of information");

        options_cli_cfg.addOption("insecure", true, "Don't verify authenticity of SSL certificates");
        // bInsecure false

        options_cli_cfg.addOption("timeout", true,
                "Set timeout for connection\nMaximum time in seconds that connection phase is allowed to take");
        // config.iTimeout 10
        options_cli_cfg.addOption("retries", true, "Set maximum number of retries on failed download");
        // config.iRetries 3

        // Options read from config file
        options_cfg_only.addOption(sToken, "token", true, "oauth token");
        // config.sToken ""

        options_cfg_only.addOption(sSecret, "secret", true, "oauth secret");
        // config.sSecret ""

        options_cli_all.addOptions(options_cli_no_cfg);
        options_cli_all.addOptions(options_cli_cfg);

        options_cfg_all.addOptions(options_cfg_only);
        options_cfg_all.addOptions(options_cli_cfg);

        options_cfg_all.parse(Config.sConfigFilePath);

        // boost.program_options.store(boost.program_options
        // .parse_command_line(argc, args, options_cli_all), vm);

        CommandLineParser parser = new GnuParser();
        String[] args2;
        if (args.length == 0) {
            args2 = new String[] { "-help" };
        } else {
            args2 = args;
        }
        CommandLine cmd = parser.parse(options_cli_all, args2);
        options_cli_all.parseCmdLine(cmd);

        path = Factory.newFile(Config.sConfigDirectory);
        if (path.exists()) {
            Properties prop = new Properties();
            try {
                FileInputStream fileInputStream = new FileInputStream(
                        Config.sConfigDirectory + File.separatorChar + Config.sConfigFilePath);
                try {
                    prop.load(fileInputStream);
                } finally {
                    fileInputStream.close();
                }
            } catch (FileNotFoundException e) {
                System.out.println("Could not open config file: " + Config.sConfigDirectory + File.separatorChar
                        + Config.sConfigFilePath + ", creating new one.");
                Factory.newFile(Config.sConfigDirectory + File.separatorChar + Config.sConfigFilePath)
                        .createNewFile();
            }
        }

        if (cmd.hasOption("help")) {
            System.out.println(Config.sVersionString);
            System.out.println("Options:");
            for (Option option : (Collection<Option>) options_cli_all.getOptions()) {
                System.out.println(String.format("%20s\t-\t%s", option.getOpt(),
                        option.getDescription().replace("\n", String.format("\n%20s\t \t", ""))));
            }
            return;
        }
        if (cmd.hasOption("version")) {
            System.out.print(Config.sVersionString);
            return;
        }
        if (cmd.hasOption("versionUpdate")) {
            String sub = "xgogdownloader-";

            try {
                HttpClient client = Factory.createHttpClient();
                HttpGet request = new HttpGet("https://drone.io/github.com/TheCrazyT/xgogdownloader/files");
                request.setHeader("User-Agent", Main.USER_AGENT);
                HttpResponse response_full = client.execute(request);
                int result = response_full.getStatusLine().getStatusCode();

                if (result != HttpStatus.SC_OK) {
                    System.err.println("Error " + result);
                }
                String response = EntityUtils.toString(response_full.getEntity());
                Document html = Jsoup.parse(response);

                Iterator<org.jsoup.nodes.Element> iterator = html.getElementsByTag("div").iterator();
                while (iterator.hasNext()) {
                    org.jsoup.nodes.Element node = iterator.next();
                    String hash = "";
                    Elements spans = node.getElementsByTag("span");
                    Iterator<org.jsoup.nodes.Element> iterator2 = spans.iterator();
                    while (iterator2.hasNext()) {
                        org.jsoup.nodes.Element span = iterator2.next();
                        if (span.text().startsWith("SHA")) {
                            hash = span.text().substring(4, 44);
                            break;
                        }
                    }
                    if (!hash.isEmpty()) {
                        iterator2 = node.getElementsByTag("a").iterator();
                        while (iterator2.hasNext()) {
                            Element a = iterator2.next();
                            String url = a.attr("href");
                            if (a.text().startsWith(sub) && a.text().endsWith(".zip")) {
                                // TODO
                                System.out.println("... TODO ...");
                                System.out.println(url);
                                return;
                            }
                        }
                    }
                }

                return;
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        if (cmd.hasOption("chunkSize")) {
            Config.iChunkSize <<= 20;
            // Convert chunk size from bytes to megabytes
        }

        if (cmd.hasOption("limitRate")) {
            Config.iDownloadRate = iDownloadRate.getValue();
            Config.iDownloadRate <<= 10;
            // Convert download rate from bytes to kilobytes
        }
        if (cmd.hasOption("check-orphans")) {
            if (Config.sOrphanRegex.isEmpty()) {
                Config.sOrphanRegex = StringUtils.join(orphans_regex_default, "|");
            }
        }

        Config.bDownload = bDownload.getValue();
        Config.sToken = sToken.getValue();
        Config.sSecret = sToken.getValue();
        Config.sSearch = sSearch.getValue();
        Config.sGameRegex = sGame.getValue();
        Config.bList = bList.getValue();

        Config.bVerifyPeer = !bInsecure.getValue();
        Config.bColor = !bNoColor.getValue();
        Config.bUnicode = !bNoUnicode.getValue();
        Config.bDuplicateHandler = !bNoDuplicateHandler.getValue();
        Config.bCover = !bNoCover.getValue();
        Config.bInstallers = !bNoInstallers.getValue();
        Config.bExtras = !bNoExtras.getValue();
        Config.bPatches = !bNoPatches.getValue();
        Config.bLanguagePacks = !bNoLanguagePacks.getValue();
        Config.bRemoteXML = !bNoRemoteXML.getValue();
        Config.bSubDirectories = !bNoSubDirectories.getValue();
    } catch (RuntimeException e) {
        System.err.println("Error: " + e.getMessage());
        throw e;
    } catch (java.lang.Exception e) {
        System.err.println("Exception of unknown type!");
        throw new RuntimeException(e);
    }

    if (Config.iInstallerType < GlobalConstants.PLATFORMS.get(0).platformId
            || Config.iInstallerType > platform_sum) {
        System.out.println("Invalid value for --platform");
        throw new RuntimeException("Invalid value for --platform");
    }

    if (Config.iInstallerLanguage < GlobalConstants.LANGUAGES.get(0).languageId
            || Config.iInstallerLanguage > language_sum) {
        System.out.println("Invalid value for --language");
        throw new RuntimeException("Invalid value for --language");
    }

    if (Config.sXMLDirectory != "") {
        // Make sure that xml directory doesn't have trailing slash
        if (Config.sXMLDirectory.charAt(Config.sXMLDirectory.length() - 1) == '/') {
            // config.sXMLDirectory.assign(config.sXMLDirectory.begin(),config.sXMLDirectory.end()
            // - 1);
        }
    }
    // Create GOG XML for a file
    if ((Config.sXMLFile != null) && !Config.sXMLFile.isEmpty() && !Config.sXMLFile.equals("automatic")) {
        util.createXML(Config.sXMLFile, Config.iChunkSize, Config.sXMLDirectory);
    }
    // Make sure that directory has trailing slash
    // if (Config.sDirectory != null && !Config.sDirectory.isEmpty()) {
    // if (Config.sDirectory.charAt(Config.sDirectory.length() - 1) != '/')
    // {
    // Config.sDirectory += "/";
    // }
    // }
    Downloader downloader = new Downloader();
    boolean result = downloader.init();

    if (Config.bLogin) {
        if (!result) {
            throw new RuntimeException("downloader.init failed");
        }
        return;
    } else if (Config.bSaveConfig) {
        // std.ofstream ofs = new
        // std.ofstream(config.sConfigFilePath.c_str());
        String ofs = null;
        if (ofs != null) {
            System.out.println("Saving config: " + Config.sConfigFilePath);
            /*
             * for (boost.program_options.variables_map.iterator it =
             * vm.begin(); it != vm.end(); ++it) { String option = it.first;
             * String option_value_string;
             * boost.program_options.variable_value option_value =
             * it.second;
             * 
             * try { if (option.equals(options_cfg_all.find(option,
             * false).long_name())) { if (!option_value.empty()) {
             * std.type_info type = option_value.value().type(); if (type ==
             * typeid(String)) { option_value_string =
             * option_value.<String>as(); }
             * 
             * } } } catch (java.lang.Exception e2) { continue; }
             * 
             * if (option_value_string!="") {
             * System.out.println(option+" = "+option_value_string); //ofs
             * << option.compareTo() < 0 < < " = " <<
             * option_value_string.compareTo() < 0 < < std.endl; } }
             * //ofs.close();
             */
        } else {
            System.out.println("Failed to create config: " + Config.sConfigFilePath);
            throw new RuntimeException("Failed to create config: " + Config.sConfigFilePath);
        }
    } else if (Config.bResetConfig) {
        String ofs = null;
        // std.ofstream ofs = new
        // std.ofstream(config.sConfigFilePath.c_str());
        if (ofs != null) {
            /*
             * if (config.sToken!="" && config.sSecret!="") { ofs
             * +="token = " +config.sToken+"\n"; ofs +="secret = "
             * +config.sSecret+"\n"; }
             */
            // ofs.close();
        } else {
            System.out.println("Failed to create config: " + Config.sConfigFilePath);
            throw new RuntimeException("Failed to create config: " + Config.sConfigFilePath);
        }
    } else if (Config.bUpdateCheck) {
        // Update check has priority over download and list
        downloader.updateCheck();
    } else if (Config.bRepair) {
        // Repair file
        downloader.repair();
    } else if ((Config.sSearch != null) && (!Config.sSearch.isEmpty())) {
        // search games
        downloader.searchGames(Config.sSearch);
    } else if (Config.bDownload) {
        // Download games
        downloader.download();
    } else if (Config.bListDetails || Config.bList) {
        // Detailed list of games/extras
        downloader.listGames();
    } else if (Config.sOrphanRegex != null) {
        // Check for orphaned files if regex for orphans is set
        downloader.checkOrphans();
    } else if (Config.bCheckStatus) {
        downloader.checkStatus();
    } else {
        // Show help message
        System.out.println(Config.sVersionString + "" + options_cli_all);
    }

    // Orphan check was called at the same time as download. Perform it
    // after download has finished
    if (Config.sOrphanRegex != null && Config.bDownload) {
        downloader.checkOrphans();
    }
    return;
}

From source file:org.ednovo.gooru.application.util.ResourceImageUtil.java

public Map<String, Object> getResourceMetaData(String url, String resourceTitle, boolean fetchThumbnail) {
    Map<String, Object> metaData = new HashMap<String, Object>();
    ResourceMetadataCo resourceFeeds = null;
    if (url != null && url.contains(VIMEO_VIDEO)) {
        resourceFeeds = getMetaDataFromVimeoVideo(url);
    } else if (url != null && url.contains(YOUTUBE_VIDEO)) {
        resourceFeeds = getYoutubeResourceFeeds(url, null);
    }/*from   ww w  .  j a v  a  2 s. co m*/
    String description = "";
    String title = "";
    String videoDuration = "";
    Set<String> images = new LinkedHashSet<String>();
    if (resourceFeeds == null || resourceFeeds.getUrlStatus() == 404) {
        Document doc = null;
        try {
            if (url != null && (url.contains("http://") || url.contains("https://"))) {
                doc = Jsoup.connect(url).timeout(6000).get();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        if (doc != null) {
            title = doc.title();
            Elements meta = doc.getElementsByTag(META);
            if (meta != null) {
                for (Element element : meta) {
                    if (element.attr(NAME) != null && element.attr(NAME).equalsIgnoreCase(DESCRIPTION)) {
                        description = element.attr(CONTENT);
                        break;
                    }
                }
            }
            metaData.put(DESCRIPTION, description);
            if (fetchThumbnail) {
                Elements media = doc.select("[src]");
                if (media != null) {
                    for (Element src : media) {
                        if (src.tagName().equals(IMG)) {
                            images.add(src.attr("abs:src"));
                        }
                        if (images.size() >= SUGGEST_IMAGE_MAX_SIZE) {
                            break;
                        }
                    }
                }
            }
        }
    } else {
        title = resourceFeeds.getTitle();
        description = resourceFeeds.getDescription();
        videoDuration = resourceFeeds.getDuration().toString();
    }
    if (fetchThumbnail) {
        if (resourceFeeds != null && resourceFeeds.getThumbnail() != null) {
            images.add(resourceFeeds.getThumbnail());
        }
        metaData.put(IMAGES, images);
    }
    metaData.put(TITLE, title);
    metaData.put(DESCRIPTION, description);
    metaData.put(DURATION, videoDuration);
    return metaData;
}

From source file:org.jboss.tools.tycho.sitegenerator.GenerateCompositeSite.java

private void collectChildrenFromRemote(String collectChildrenFromRemoteURL2,
        String collectChildrenFromRemoteRegex2, int collectChildrenFromRemoteLimit2,
        List<String> childSitesList2) throws MojoFailureException {
    Document doc = null;
    try {/*from   w ww . ja v a  2  s  .  c o  m*/
        // getLog().debug("Load children from: " +
        // collectChildrenFromRemoteURL2);
        doc = Jsoup.connect(collectChildrenFromRemoteURL2).get();
        // getLog().debug("Regex to match: " +
        // collectChildrenFromRemoteRegex2);
        Elements links = doc.getElementsByTag("a");

        // sort larges (newest) first
        Collections.sort(links, new Comparator<Element>() {
            @Override
            public int compare(Element e1, Element e2) {
                return e2.attr("href").compareTo(e1.attr("href"));
            }
        });

        int linksAdded = 0;
        for (Element link : links) {
            String linkHref = link.attr("href");
            if (collectChildrenFromRemoteRegex2 == null || (linkHref.matches(collectChildrenFromRemoteRegex2)
                    && (linksAdded < collectChildrenFromRemoteLimit2 || collectChildrenFromRemoteLimit2 < 0))) {
                getLog().debug("Adding: " + linkHref);
                childSitesList2.add(collectChildrenFromRemoteURL2 + linkHref);
                linksAdded++;
            }
        }
    } catch (IOException ex) {
        throw new MojoFailureException(ex.getMessage(), ex);
    }
    doc = null;
}

From source file:org.jboss.tools.windup.ui.internal.issues.IssueDetailsView.java

public static void addPrism(Document doc) {
    try {/*from  ww  w.  j a  v a2 s. c  o  m*/
        Bundle bundle = WindupUIPlugin.getDefault().getBundle();
        Elements codeElements = doc.getElementsByTag("code");
        codeElements.forEach(element -> {
            Set<String> classNames = element.classNames();
            Set<String> newNames = Sets.newHashSet();
            classNames.forEach(className -> {
                // prismjs requires prefix, i'm not sure about another/easier workaround.
                newNames.add("language-" + className);
            });
            element.classNames(newNames);
        });

        DocumentType type = new DocumentType("html", "", "", "");
        doc.insertChildren(0, Lists.newArrayList(type));

        Element head = doc.head();
        Element css = doc.createElement("link");

        URL fileURL = FileLocator.find(bundle, new Path("html/prism.css"), null);
        String srcPath = FileLocator.resolve(fileURL).getPath();

        css.attr("href", srcPath);
        css.attr("rel", "stylesheet");
        head.appendChild(css);

        Element body = doc.body();
        Element script = doc.createElement("script");

        fileURL = FileLocator.find(bundle, new Path("html/prism.js"), null);
        srcPath = FileLocator.resolve(fileURL).getPath();

        script.attr("src", srcPath);
        body.appendChild(script);
    } catch (Exception e) {
        WindupUIPlugin.log(e);
    }
}

From source file:org.loklak.api.search.EventBriteCrawlerService.java

public static SusiThought crawlEventBrite(String url) {
    Document htmlPage = null;

    try {/*from  ww  w . j  a  va2 s  .  c om*/
        htmlPage = Jsoup.connect(url).get();
    } catch (Exception e) {
        e.printStackTrace();
    }

    String eventID = null;
    String eventName = null;
    String eventDescription = null;

    // TODO Fetch Event Color
    String eventColor = null;

    String imageLink = null;

    String eventLocation = null;

    String startingTime = null;
    String endingTime = null;

    String ticketURL = null;

    Elements tagSection = null;
    Elements tagSpan = null;
    String[][] tags = new String[5][2];
    String topic = null; // By default

    String closingDateTime = null;
    String schedulePublishedOn = null;
    JSONObject creator = new JSONObject();
    String email = null;

    Float latitude = null;
    Float longitude = null;

    String privacy = "public"; // By Default
    String state = "completed"; // By Default
    String eventType = "";

    String temp;
    Elements t;

    eventID = htmlPage.getElementsByTag("body").attr("data-event-id");
    eventName = htmlPage.getElementsByClass("listing-hero-body").text();
    eventDescription = htmlPage.select("div.js-xd-read-more-toggle-view.read-more__toggle-view").text();

    eventColor = null;

    imageLink = htmlPage.getElementsByTag("picture").attr("content");

    eventLocation = htmlPage.select("p.listing-map-card-street-address.text-default").text();

    temp = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content");
    if (temp.length() >= 20) {
        startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content")
                .substring(0, 19);
    } else {
        startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content");
    }

    temp = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content");
    if (temp.length() >= 20) {
        endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content")
                .substring(0, 19);
    } else {
        endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content");
    }

    ticketURL = url + "#tickets";

    // TODO Tags to be modified to fit in the format of Open Event "topic"
    tagSection = htmlPage.getElementsByAttributeValue("data-automation", "ListingsBreadcrumbs");
    tagSpan = tagSection.select("span");
    topic = "";

    int iterator = 0, k = 0;
    for (Element e : tagSpan) {
        if (iterator % 2 == 0) {
            tags[k][1] = "www.eventbrite.com"
                    + e.select("a.js-d-track-link.badge.badge--tag.l-mar-top-2").attr("href");
        } else {
            tags[k][0] = e.text();
            k++;
        }
        iterator++;
    }

    creator.put("email", "");
    creator.put("id", "1"); // By Default

    temp = htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content");
    if (temp.length() > 0) {
        latitude = Float.valueOf(
                htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content"));
    }

    temp = htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content");
    if (temp.length() > 0) {
        longitude = Float.valueOf(
                htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content"));
    }

    // TODO This returns: "events.event" which is not supported by Open
    // Event Generator
    // eventType = htmlPage.getElementsByAttributeValue("property",
    // "og:type").attr("content");

    String organizerName = null;
    String organizerLink = null;
    String organizerProfileLink = null;
    String organizerWebsite = null;
    String organizerContactInfo = null;
    String organizerDescription = null;
    String organizerFacebookFeedLink = null;
    String organizerTwitterFeedLink = null;
    String organizerFacebookAccountLink = null;
    String organizerTwitterAccountLink = null;

    temp = htmlPage.select("a.js-d-scroll-to.listing-organizer-name.text-default").text();
    if (temp.length() >= 5) {
        organizerName = htmlPage.select("a.js-d-scroll-to.listing-organizer-name.text-default").text()
                .substring(4);
    } else {
        organizerName = "";
    }
    organizerLink = url + "#listing-organizer";
    organizerProfileLink = htmlPage
            .getElementsByAttributeValue("class", "js-follow js-follow-target follow-me fx--fade-in is-hidden")
            .attr("href");
    organizerContactInfo = url + "#lightbox_contact";

    Document orgProfilePage = null;

    try {
        orgProfilePage = Jsoup.connect(organizerProfileLink).get();
    } catch (Exception e) {
        e.printStackTrace();
    }

    if (orgProfilePage != null) {

        t = orgProfilePage.getElementsByAttributeValue("class", "l-pad-vert-1 organizer-website");
        if (t != null) {
            organizerWebsite = orgProfilePage
                    .getElementsByAttributeValue("class", "l-pad-vert-1 organizer-website").text();
        } else {
            organizerWebsite = "";
        }

        t = orgProfilePage.select("div.js-long-text.organizer-description");
        if (t != null) {
            organizerDescription = orgProfilePage.select("div.js-long-text.organizer-description").text();
        } else {
            organizerDescription = "";
        }

        organizerFacebookFeedLink = organizerProfileLink + "#facebook_feed";
        organizerTwitterFeedLink = organizerProfileLink + "#twitter_feed";

        t = orgProfilePage.getElementsByAttributeValue("class", "fb-page");
        if (t != null) {
            organizerFacebookAccountLink = orgProfilePage.getElementsByAttributeValue("class", "fb-page")
                    .attr("data-href");
        } else {
            organizerFacebookAccountLink = "";
        }

        t = orgProfilePage.getElementsByAttributeValue("class", "twitter-timeline");
        if (t != null) {
            organizerTwitterAccountLink = orgProfilePage
                    .getElementsByAttributeValue("class", "twitter-timeline").attr("href");
        } else {
            organizerTwitterAccountLink = "";
        }

    }

    JSONArray socialLinks = new JSONArray();

    JSONObject fb = new JSONObject();
    fb.put("id", "1");
    fb.put("name", "Facebook");
    fb.put("link", organizerFacebookAccountLink);
    socialLinks.put(fb);

    JSONObject tw = new JSONObject();
    tw.put("id", "2");
    tw.put("name", "Twitter");
    tw.put("link", organizerTwitterAccountLink);
    socialLinks.put(tw);

    JSONArray jsonArray = new JSONArray();

    JSONObject event = new JSONObject();
    event.put("event_url", url);
    event.put("id", eventID);
    event.put("name", eventName);
    event.put("description", eventDescription);
    event.put("color", eventColor);
    event.put("background_url", imageLink);
    event.put("closing_datetime", closingDateTime);
    event.put("creator", creator);
    event.put("email", email);
    event.put("location_name", eventLocation);
    event.put("latitude", latitude);
    event.put("longitude", longitude);
    event.put("start_time", startingTime);
    event.put("end_time", endingTime);
    event.put("logo", imageLink);
    event.put("organizer_description", organizerDescription);
    event.put("organizer_name", organizerName);
    event.put("privacy", privacy);
    event.put("schedule_published_on", schedulePublishedOn);
    event.put("state", state);
    event.put("type", eventType);
    event.put("ticket_url", ticketURL);
    event.put("social_links", socialLinks);
    event.put("topic", topic);
    jsonArray.put(event);

    JSONObject org = new JSONObject();
    org.put("organizer_name", organizerName);
    org.put("organizer_link", organizerLink);
    org.put("organizer_profile_link", organizerProfileLink);
    org.put("organizer_website", organizerWebsite);
    org.put("organizer_contact_info", organizerContactInfo);
    org.put("organizer_description", organizerDescription);
    org.put("organizer_facebook_feed_link", organizerFacebookFeedLink);
    org.put("organizer_twitter_feed_link", organizerTwitterFeedLink);
    org.put("organizer_facebook_account_link", organizerFacebookAccountLink);
    org.put("organizer_twitter_account_link", organizerTwitterAccountLink);
    jsonArray.put(org);

    JSONArray microlocations = new JSONArray();
    jsonArray.put(new JSONObject().put("microlocations", microlocations));

    JSONArray customForms = new JSONArray();
    jsonArray.put(new JSONObject().put("customForms", customForms));

    JSONArray sessionTypes = new JSONArray();
    jsonArray.put(new JSONObject().put("sessionTypes", sessionTypes));

    JSONArray sessions = new JSONArray();
    jsonArray.put(new JSONObject().put("sessions", sessions));

    JSONArray sponsors = new JSONArray();
    jsonArray.put(new JSONObject().put("sponsors", sponsors));

    JSONArray speakers = new JSONArray();
    jsonArray.put(new JSONObject().put("speakers", speakers));

    JSONArray tracks = new JSONArray();
    jsonArray.put(new JSONObject().put("tracks", tracks));

    String userHome = System.getProperty("user.home");
    String path = userHome + "/Downloads/EventBriteInfo";

    new File(path).mkdir();

    try (FileWriter file = new FileWriter(path + "/event.json")) {
        file.write(event.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/org.json")) {
        file.write(org.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/social_links.json")) {
        file.write(socialLinks.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/microlocations.json")) {
        file.write(microlocations.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/custom_forms.json")) {
        file.write(customForms.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/session_types.json")) {
        file.write(sessionTypes.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/sessions.json")) {
        file.write(sessions.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/sponsors.json")) {
        file.write(sponsors.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/speakers.json")) {
        file.write(speakers.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    try (FileWriter file = new FileWriter(path + "/tracks.json")) {
        file.write(tracks.toString());
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    SusiThought json = new SusiThought();
    json.setData(jsonArray);
    return json;

}

From source file:org.loklak.api.search.InstagramProfileScraper.java

public static SusiThought scrapeInstagram(String profile) {

    Document htmlPage = null;

    try {/*from  w  w  w.j a  v a2s  .c  o m*/
        htmlPage = Jsoup.connect("https://www.instagram.com/" + profile).get();
    } catch (IOException e) {
        e.printStackTrace();
    }

    String script = htmlPage.getElementsByTag("script").get(6).html().substring(21);
    JSONObject obj = new JSONObject(script);

    JSONArray instaProfile = new JSONArray();
    instaProfile.put(obj.get("entry_data"));

    SusiThought json = new SusiThought();
    json.setData(instaProfile);
    return json;

}