Example usage for org.jsoup.nodes Document getElementsByClass

List of usage examples for org.jsoup.nodes Document getElementsByClass

Introduction

In this page you can find the example usage for org.jsoup.nodes Document getElementsByClass.

Prototype

public Elements getElementsByClass(String className) 

Source Link

Document

Find elements that have this class, including or under this element.

Usage

From source file:faescapeplan.FAEscapePlanUI.java

@SuppressWarnings("unchecked")
private void downloadJournals(ArrayList<String> journalList) {
    JSONArray jsonList = new JSONArray();
    String downloadLoc = this.saveLocText.getText();
    Path jsonPath = Paths.get(downloadLoc + "\\" + userData.getName() + "\\journals\\journals.json");

    try {//from ww  w.j a  va  2s .c  o m
        Files.deleteIfExists(jsonPath);
        Files.createFile(jsonPath);
    } catch (IOException ex) {
        Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex);
        JOptionPane.showMessageDialog(this, "A critical IO exception occurred in method: downloadJournals");
    }

    for (String item : journalList) {
        try {
            Map<String, String> jsonMap = new LinkedHashMap<>();
            Document doc = Jsoup.connect("http://www.furaffinity.net/journal/" + item + "/")
                    .cookies(userData.getCookies()).userAgent(USER_AGENT).get();
            String title = doc.title().split(" -- ")[0];
            String date = doc.getElementsByClass("popup_date").get(0).attr("title");
            String body = doc.getElementsByClass("journal-body").get(0).html();
            jsonMap.put("title", title);
            jsonMap.put("date", date);
            jsonMap.put("body", body);
            jsonList.add(jsonMap);
            Path journalPath = Paths.get(downloadLoc,
                    "\\" + userData.getName() + "\\journals\\" + item + "_" + title + ".txt");
            String bodyParsed = removeHtmlTags(body);

            try (FileWriter journalWriter = new FileWriter(new File(journalPath.toString()))) {
                journalWriter.append(title + System.getProperty("line.separator"));
                journalWriter.append(date + System.getProperty("line.separator"));
                journalWriter.append(bodyParsed + System.getProperty("line.separator"));
            }
        } catch (FileAlreadyExistsException ex) {
            Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex);
            updateTextLog("File already exists");
        } catch (IOException ex) {
            Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex);
            updateTextLog("An IO Exception occurred while downloading journal: " + item);
        }
    }

    String jsonString = JSONValue.toJSONString(jsonList);

    try {
        Files.write(jsonPath, Arrays.asList(jsonString), StandardOpenOption.WRITE);
    } catch (IOException ex) {
        Logger.getLogger(FAEscapePlanUI.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

static void parse(final String jdocBase, final String name, final InputStream inputStream,
        Map<String, ClassDocumentation> docs) {
    final String[] pathSplits = name.split("/");
    final String fileName = pathSplits[pathSplits.length - 1];
    if (!Character.isUpperCase(fileName.charAt(0))) {
        //ignore jdoc structure html
        return;//from  w  w  w.j  a v  a2 s  .  c o  m
    }
    final String[] nameSplits = fileName.split("\\.");
    final String className = nameSplits[nameSplits.length - 2];
    final String fullName = fileName.substring(0,
            fileName.length() - nameSplits[nameSplits.length - 1].length() - 1);
    try (BufferedReader buffer = new BufferedReader(new InputStreamReader(inputStream))) {
        //create dom Document
        final String content = buffer.lines().collect(Collectors.joining("\n"));
        Document document = Jsoup.parse(content);

        //classDocument (classname, package, description)
        Element titleElem = getSingleElementByClass(document, "title");
        final String classSig = JDocUtil.fixSpaces(titleElem.text());
        Element packageElem = titleElem.previousElementSibling();
        if (packageElem.children().size() > 1) {
            packageElem = packageElem.children().last();
        }
        final String pack = JDocUtil.fixSpaces(packageElem.text());
        final String link = JDocUtil.getLink(jdocBase, pack, fullName);
        Element descriptionElement = null;
        Elements descriptionCandidates = document.select(".description .block");
        if (descriptionCandidates.size() > 1) {
            List<Element> removed = descriptionCandidates.stream().map(elem -> elem.child(0))
                    .filter(child -> child != null && !child.className().startsWith("deprecat"))
                    .map(Element::parent).collect(Collectors.toList());
            if (removed.size() != 1)
                throw new RuntimeException("Found too many description candidates");
            descriptionElement = removed.get(0);
        } else if (descriptionCandidates.size() == 1) {
            descriptionElement = descriptionCandidates.get(0);
        }
        final String description = descriptionElement == null ? ""
                : JDocUtil.formatText(descriptionElement.html(), link);
        final ClassDocumentation classDoc = new ClassDocumentation(pack, fullName, classSig, description,
                classSig.startsWith("Enum"));

        //methods, fields
        final Element details = document.getElementsByClass("details").first();
        if (details != null) {
            //methods
            Element tmp = getSingleElementByQuery(details, "a[name=\"method.detail\"]");
            List<DocBlock> docBlock = getDocBlock(jdocBase, tmp, classDoc);
            if (docBlock != null) {
                for (DocBlock block : docBlock) {
                    Set<MethodDocumentation> mdocs = classDoc.methodDocs
                            .computeIfAbsent(block.title.toLowerCase(), key -> new HashSet<>());
                    mdocs.add(new MethodDocumentation(classDoc, block.signature, block.hashLink,
                            block.description, block.fields));
                }
            }
            //vars
            tmp = getSingleElementByQuery(details, "a[name=\"field.detail\"]");
            docBlock = getDocBlock(jdocBase, tmp, classDoc);
            if (docBlock != null) {
                for (DocBlock block : docBlock) {
                    classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc,
                            block.title, block.hashLink, block.signature, block.description));
                }
            }
            //enum-values
            tmp = getSingleElementByQuery(details, "a[name=\"enum.constant.detail\"]");
            docBlock = getDocBlock(jdocBase, tmp, classDoc);
            if (docBlock != null) {
                for (DocBlock block : docBlock) {
                    classDoc.classValues.put(block.title.toLowerCase(), new ValueDocumentation(classDoc,
                            block.title, block.hashLink, block.signature, block.description));
                }
            }
        }
        final Element methodSummary = getSingleElementByQuery(document, "a[name=\"method.summary\"]");
        classDoc.inheritedMethods.putAll(getInheritedMethods(methodSummary));

        //storing
        if (nameSplits.length > 2) {
            if (!docs.containsKey(nameSplits[0].toLowerCase()))
                docs.put(nameSplits[0].toLowerCase(), new ClassDocumentation(null, null, null, null, false));
            ClassDocumentation parent = docs.get(nameSplits[0].toLowerCase());
            for (int i = 1; i < nameSplits.length - 2; i++) {
                if (!parent.subClasses.containsKey(nameSplits[i].toLowerCase()))
                    parent.subClasses.put(nameSplits[i].toLowerCase(),
                            new ClassDocumentation(null, null, null, null, false));
                parent = parent.subClasses.get(nameSplits[i].toLowerCase());
            }
            if (parent.subClasses.containsKey(className.toLowerCase()))
                classDoc.subClasses.putAll(parent.subClasses.get(className.toLowerCase()).subClasses);
            parent.subClasses.put(className.toLowerCase(), classDoc);
        }
        if (docs.containsKey(fullName.toLowerCase())) {
            ClassDocumentation current = docs.get(fullName.toLowerCase());
            if (current.classSig != null)
                throw new RuntimeException("Got a class-name conflict with classes " + classDoc.classSig + "("
                        + classDoc.className + ") AND " + current.classSig + "(" + current.className + ")");
            classDoc.subClasses.putAll(current.subClasses);
        }
        docs.put(fullName.toLowerCase(), classDoc);
    } catch (final IOException | NullPointerException ex) {
        JDocUtil.LOG.error("Got excaption for element {}", fullName, ex);
    }
    try {
        inputStream.close();
    } catch (final IOException e) {
        JDocUtil.LOG.error("Error closing inputstream", e);
    }
}

From source file:info.dolezel.fatrat.plugins.UloztoDownload.java

@Override
public void processLink(String link) {

    //if (link.contains("/live/"))
    //    link = link.replace("/live/", "/");
    if (link.startsWith("http://uloz.to") || link.startsWith("https://uloz.to"))
        link = link.replace("https?://uloz.to", "https://www.uloz.to");
    if (link.startsWith("http://m.uloz.to") || link.startsWith("https://m.uloz.to"))
        link = link.replace("https?://m.uloz.to", "https://www.uloz.to");

    if (!logIn(link))
        return;/*from  w  w w . ja v a2  s .  c o m*/

    final String downloadLink = link; // I can't make 'link' final

    fetchPage(link, new PageFetchListener() {

        @Override
        public void onCompleted(ByteBuffer buf, Map<String, String> headers) {
            try {
                if (headers.containsKey("location")) {
                    String location = headers.get("location");
                    if (location.contains("smazano") || location.contains("nenalezeno"))
                        setFailed("The file has been removed");
                    else
                        processLink(location);
                    return;
                }

                CharBuffer cb = charsetUtf8.decode(buf);

                if (cb.toString().contains("?disclaimer=1")) {
                    processLink(downloadLink + "?disclaimer=1");
                    return;
                }

                final Document doc = Jsoup.parse(cb.toString());
                final Element freeForm = doc.getElementById("frm-download-freeDownloadTab-freeDownloadForm");
                final Element premiumLink = doc.getElementById("#quickDownloadButton");

                boolean usePremium = usePremium(downloadLink);

                if (cb.toString().contains("Nem dostatek kreditu"))
                    setMessage("Credit depleted, using FREE download");
                else if (usePremium && premiumLink != null) {
                    String msg = "Using premium download";

                    Elements aCredits = doc.getElementsByAttributeValue("href", "/kredit");

                    if (!aCredits.isEmpty())
                        msg += " (" + aCredits.get(0).ownText() + " left)";

                    setMessage(msg);

                    startDownload("http://www.uloz.to" + premiumLink.attr("href"));
                    return;

                } else if (loggedIn)
                    setMessage("Login failed, using FREE download");

                Elements aNames = doc.getElementsByClass("jsShowDownload");
                if (!aNames.isEmpty())
                    reportFileName(aNames.get(0).ownText());

                final PostQuery pq = new PostQuery();
                final Map<String, String> hdr = new HashMap<String, String>();
                Elements eHiddens = freeForm.select("input[type=hidden]");

                hdr.put("X-Requested-With", "XMLHttpRequest");
                hdr.put("Referer", downloadLink);
                hdr.put("Accept", "application/json, text/javascript, */*; q=0.01");

                for (Element e : eHiddens)
                    pq.add(e.attr("name"), e.attr("value"));

                fetchPage("https://uloz.to/reloadXapca.php?rnd=" + Math.abs(new Random().nextInt()),
                        new PageFetchListener() {

                            @Override
                            public void onCompleted(ByteBuffer buf, Map<String, String> headers) {
                                CharBuffer cb = charsetUtf8.decode(buf);
                                String captchaUrl;

                                try {
                                    JSONObject json = new JSONObject(cb.toString());
                                    captchaUrl = "https:" + json.getString("image");
                                    pq.add("hash", json.getString("hash"));
                                    pq.add("timestamp", "" + json.getInt("timestamp"));
                                    pq.add("salt", "" + json.getInt("salt"));
                                } catch (JSONException e) {
                                    setFailed("Error parsing captcha JSON");
                                    return;
                                }

                                solveCaptcha(captchaUrl, new CaptchaListener() {

                                    @Override
                                    public void onFailed() {
                                        setFailed("Failed to decode the captcha code");
                                    }

                                    @Override
                                    public void onSolved(String text) {

                                        String action = freeForm.attr("action");
                                        pq.add("captcha_value", text);

                                        fetchPage("https://www.uloz.to" + action, new PageFetchListener() {

                                            @Override
                                            public void onCompleted(ByteBuffer buf,
                                                    Map<String, String> headers) {
                                                try {
                                                    CharBuffer cb = charsetUtf8.decode(buf);
                                                    JSONObject obj = new JSONObject(cb.toString());

                                                    startDownload(obj.getString("url"));
                                                } catch (Exception e) {
                                                    setFailed("" + e);
                                                }
                                            }

                                            @Override
                                            public void onFailed(String error) {
                                                setFailed(error);
                                            }

                                        }, pq.toString(), hdr);

                                    }
                                });
                            }

                            @Override
                            public void onFailed(String error) {
                                setFailed("Failed to load captcha AJAX page");
                            }

                        });

            } catch (Exception e) {
                e.printStackTrace();
                setFailed(e.toString());
            }
        }

        @Override
        public void onFailed(String error) {
            setFailed("Failed to load the initial page");
        }
    }, null);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

protected boolean login(Account acc) throws OpacErrorException {
    String html;//from  w  w  w  .j  ava2s .  c  o m

    List<NameValuePair> nameValuePairs = new ArrayList<>(2);

    try {
        String loginPage;
        loginPage = httpGet(opac_url + "/userAccount.do?methodToCall=show&type=1", ENCODING);
        Document loginPageDoc = Jsoup.parse(loginPage);
        if (loginPageDoc.select("input[name=as_fid]").size() > 0) {
            nameValuePairs.add(new BasicNameValuePair("as_fid",
                    loginPageDoc.select("input[name=as_fid]").first().attr("value")));
        }
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    nameValuePairs.add(new BasicNameValuePair("username", acc.getName()));
    nameValuePairs.add(new BasicNameValuePair("password", acc.getPassword()));
    nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
    nameValuePairs.add(new BasicNameValuePair("methodToCall", "submit"));
    try {
        html = handleLoginMessage(
                httpPost(opac_url + "/login.do", new UrlEncodedFormEntity(nameValuePairs), ENCODING));
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
        return false;
    } catch (ClientProtocolException e) {
        e.printStackTrace();
        return false;
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    }

    Document doc = Jsoup.parse(html);

    if (doc.getElementsByClass("error").size() > 0) {
        throw new OpacErrorException(doc.getElementsByClass("error").get(0).text());
    }

    logged_in = System.currentTimeMillis();
    logged_in_as = acc;

    return true;
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public AccountData account(Account acc) throws IOException, JSONException, OpacErrorException {
    if (!initialised) {
        start();// w  ww .  j  av a  2  s .  co m
    }

    if (acc.getName() == null || acc.getName().equals("null")) {
        return null;
    }

    List<NameValuePair> nameValuePairs;
    String html = httpGet(opac_url + "/index.asp?kontofenster=start", "ISO-8859-1");
    Document doc = Jsoup.parse(html);
    if (doc.select("input[name=AUSWEIS]").size() > 0) {
        // Login vonnten
        nameValuePairs = new ArrayList<>();
        nameValuePairs.add(new BasicNameValuePair("AUSWEIS", acc.getName()));
        nameValuePairs.add(new BasicNameValuePair("PWD", acc.getPassword()));
        if (data.has("db")) {
            nameValuePairs.add(new BasicNameValuePair("vkontodb", data.getString("db")));
        }
        nameValuePairs.add(new BasicNameValuePair("B1", "weiter"));
        nameValuePairs.add(new BasicNameValuePair("kontofenster", "true"));
        nameValuePairs.add(new BasicNameValuePair("target", "konto"));
        nameValuePairs.add(new BasicNameValuePair("type", "K"));
        html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), "ISO-8859-1", true);
        doc = Jsoup.parse(html);
    }
    if (doc.getElementsByClass("kontomeldung").size() == 1) {
        throw new OpacErrorException(doc.getElementsByClass("kontomeldung").get(0).text());
    }
    logged_in_as = acc;
    logged_in = System.currentTimeMillis();
    return parse_account(acc, doc, data);
}

From source file:de.geeksfactory.opacclient.apis.SISIS.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();
    final String branch_inputfield = "issuepoint";

    Document doc = null;

    String action = "reservation";
    if (reservation_info.contains("doBestellung")) {
        action = "order";
    }/*from  www. j  av  a 2 s  . c o m*/

    if (useraction == MultiStepResult.ACTION_CONFIRMATION) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair("methodToCall", action));
        nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
        String html = httpPost(opac_url + "/" + action + ".do", new UrlEncodedFormEntity(nameValuePairs),
                ENCODING);
        doc = Jsoup.parse(html);
    } else if (selection == null || useraction == 0) {
        String html = httpGet(opac_url + "/availability.do?" + reservation_info, ENCODING);
        doc = Jsoup.parse(html);

        if (doc.select("input[name=username]").size() > 0) {
            // Login vonnten
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair("username", acc.getName()));
            nameValuePairs.add(new BasicNameValuePair("password", acc.getPassword()));
            nameValuePairs.add(new BasicNameValuePair("methodToCall", "submit"));
            nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
            nameValuePairs.add(new BasicNameValuePair("login_action", "Login"));

            html = handleLoginMessage(
                    httpPost(opac_url + "/login.do", new UrlEncodedFormEntity(nameValuePairs), ENCODING));
            doc = Jsoup.parse(html);

            if (doc.getElementsByClass("error").size() == 0) {
                logged_in = System.currentTimeMillis();
                logged_in_as = acc;
            }
        }
        if (doc.select("input[name=expressorder]").size() > 0) {
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
            nameValuePairs.add(new BasicNameValuePair("methodToCall", action));
            nameValuePairs.add(new BasicNameValuePair("CSId", CSId));
            nameValuePairs.add(new BasicNameValuePair("expressorder", " "));
            html = httpPost(opac_url + "/" + action + ".do", new UrlEncodedFormEntity(nameValuePairs),
                    ENCODING);
            doc = Jsoup.parse(html);
        }
        if (doc.select("input[name=" + branch_inputfield + "]").size() > 0) {
            List<Map<String, String>> branches = new ArrayList<>();
            for (Element option : doc.select("input[name=" + branch_inputfield + "]").first().parent().parent()
                    .parent().select("td")) {
                if (option.select("input").size() != 1) {
                    continue;
                }
                String value = option.text().trim();
                String key = option.select("input").val();
                Map<String, String> selopt = new HashMap<>();
                selopt.put("key", key);
                selopt.put("value", value);
                branches.add(selopt);
            }
            ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
            result.setActionIdentifier(ReservationResult.ACTION_BRANCH);
            result.setSelection(branches);
            return result;
        }
    } else if (useraction == ReservationResult.ACTION_BRANCH) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
        nameValuePairs.add(new BasicNameValuePair("methodToCall", action));
        nameValuePairs.add(new BasicNameValuePair("CSId", CSId));

        String html = httpPost(opac_url + "/" + action + ".do", new UrlEncodedFormEntity(nameValuePairs),
                ENCODING);
        doc = Jsoup.parse(html);
    }

    if (doc == null) {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }

    if (doc.getElementsByClass("error").size() >= 1) {
        return new ReservationResult(MultiStepResult.Status.ERROR,
                doc.getElementsByClass("error").get(0).text());
    }

    if (doc.select("#CirculationForm p").size() > 0 && doc.select("input[type=button]").size() >= 2) {
        List<String[]> details = new ArrayList<>();
        for (String row : doc.select("#CirculationForm p").first().html().split("<br>")) {
            Document frag = Jsoup.parseBodyFragment(row);
            if (frag.text().contains(":")) {
                String[] split = frag.text().split(":");
                if (split.length >= 2) {
                    details.add(new String[] { split[0].trim() + ":", split[1].trim() });
                }
            } else {
                details.add(new String[] { "", frag.text().trim() });
            }
        }
        ReservationResult result = new ReservationResult(Status.CONFIRMATION_NEEDED);
        result.setDetails(details);
        return result;
    }

    if (doc.select("#CirculationForm .textrot").size() >= 1) {
        String errmsg = doc.select("#CirculationForm .textrot").get(0).text();
        if (errmsg.contains("Dieses oder andere Exemplare in anderer Zweigstelle ausleihbar")) {
            Copy best = null;
            for (Copy copy : item.getCopies()) {
                if (copy.getResInfo() == null) {
                    continue;
                }
                if (best == null) {
                    best = copy;
                    continue;
                }
                try {
                    if (Integer.parseInt(copy.getReservations()) < Long.parseLong(best.getReservations())) {
                        best = copy;
                    } else if (Integer.parseInt(copy.getReservations()) == Long
                            .parseLong(best.getReservations())) {
                        if (copy.getReturnDate().isBefore(best.getReturnDate())) {
                            best = copy;
                        }
                    }
                } catch (NumberFormatException e) {

                }
            }
            if (best != null) {
                item.setReservation_info(best.getResInfo());
                return reservation(item, acc, 0, null);
            }
        }
        return new ReservationResult(MultiStepResult.Status.ERROR, errmsg);
    }

    if (doc.select("#CirculationForm td[colspan=2] strong").size() >= 1) {
        return new ReservationResult(MultiStepResult.Status.OK,
                doc.select("#CirculationForm td[colspan=2] strong").get(0).text());
    }
    return new ReservationResult(Status.OK);
}

From source file:hu.tbognar76.apking.ApKing.java

public GoogleCategory getCategoryFromGooglePlayStore(String packageName) {
    GoogleCategory cc = new GoogleCategory();
    cc.cat1 = "Unknown";
    cc.cat2 = "Unknown";
    String url = "https://play.google.com/store/apps/details?id=" + URI.create(packageName) + "&hl=en";

    Document doc = null;
    try {//  w  w  w  .  j av a 2  s.com
        doc = Jsoup.connect(url).get();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        // e.printStackTrace();
        System.out.println("!! GooglePlay connect error with : " + url);
        return cc;
    }
    // <span itemprop="genre">letstlus</span>

    /*
     * Elements link = doc.select(".document-subtitle category"); String
     * linkHref = link.attr("href"); // "http://example.com/" String
     * linkText = link.text(); // "example""
     */

    Elements genres = doc.select("a[itemprop=genre]");
    if (genres != null) {
        Element e = genres.first();
        if (e != null) {
            cc.cat2 = e.text();

            String hr = e.attr("href");

            if (hr.indexOf("category/GAME") != -1 || hr.indexOf("category/FAMILY") != -1) {
                cc.cat1 = "Game";
            } else {
                cc.cat1 = "Application";
            }

        } else {
            System.out.println("!! GooglePlay parse error structure with : " + url);
        }

    } else {
        System.out.println("!! GooglePlay parse error with : " + url);
    }
    /*
     * for (Element e : genres) { // System.out.println(e.text()); if
     * (!out.equals("")) { out = out + " "; } out = out + e.text();
     * 
     * }
     */

    // <div class="content" itemprop="softwareVersion"> 2.6.9.0 </div>

    // Elements versions = doc.select("div[itemprop=softwareVersion]");

    // System.out.println(versions.first().text());

    // <a class="document-subtitle category"
    // href="/store/apps/category/GAME_ADVENTURE"> <span
    // itemprop="genre">Kalandjtkok</span> </a>

    /*
    Elements maincat = doc.getElementsByClass("category");
            
    if (maincat != null) {
       Element p = maincat.first();
       if (p != null) {
    String href = maincat.attr("href");
    if (href != null) {
            
       if ((href.lastIndexOf("GAME") != -1) || (href.lastIndexOf("FAMILY") != -1)) {
          cc.cat1 = "Game";
       } else {
          cc.cat1 = "Application";
       }
            
    }
    // cc.cat1 = maincat.attr("href");
       }
    }
    */

    // <img alt="PEGI 3" class="document-subtitle content-rating-badge"
    // src="//lpfw=h28">
    // <span class="document-subtitle content-rating-title">PEGI 3</span>
    Elements pegi = doc.getElementsByClass("content-rating-title");
    if (pegi != null) {
        Element p = pegi.first();
        if (p != null) {
            // cc.cat1 = p.text();

        }
    }

    return cc;
}

From source file:crawler.HackerEarthCrawler.java

@Override
public void crawl() {

    int flag = 0;

    //set of urls which should be crawled
    TreeSet<String> linksset = new TreeSet<String>();
    TreeSet<String> tempset = new TreeSet<String>();
    TreeSet<String> tutorialset = new TreeSet<String>();
    //final set of problem urls
    TreeSet<String> problemset = new TreeSet<String>();
    //visited for maintaing status of if url is already crawled or not
    TreeMap<String, Integer> visited = new TreeMap<String, Integer>();

    //add base url
    linksset.add(baseUrl);// ww  w .j  av a2 s . c o m
    //mark base url as not crawled
    visited.put(baseUrl, 0);

    try {
        while (true) {
            flag = 0;
            tempset.clear();

            for (String str : linksset) {
                //check if url is already crawled or not and it has valid domain name
                if ((visited.get(str) == 0) && (str.startsWith("https://www.hackerearth.com/"))) {
                    System.out.println("crawling  " + str);

                    //retriving response of current url as document
                    Document doc = Jsoup.connect(str).timeout(0).userAgent(
                            "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0")
                            .referrer("http://www.google.com").ignoreHttpErrors(true).get();
                    //retriving all urls from current page
                    Elements links = doc.select("a[href]");

                    //mark url as crawled
                    visited.put(str, 1);

                    //mark flag as url is crawled
                    flag = 1;
                    //retrive all urls
                    for (Element link : links) {
                        if (link.absUrl("href").endsWith("/tutorial/")) {
                            tutorialset.add(link.absUrl("href"));
                        }
                        //check if url is problem url then add it in problemurlset
                        if (link.absUrl("href").startsWith("https://www.hackerearth.com/")
                                && isProblemUrl(link.absUrl("href"))) {
                            problemset.add(link.absUrl("href"));
                        }
                        //check if url has valid domain and it has problem urls or not
                        if (link.absUrl("href").contains(("https://www.hackerearth.com/"))
                                && isCrawlable(link.absUrl("href"))) {
                            //if link is not visited then mark it as uncrawled
                            if (!visited.containsKey(link.absUrl("href"))) {
                                visited.put(link.absUrl("href"), 0);
                            }
                            //add it in tempsetorary set
                            tempset.add(link.absUrl("href"));
                            //System.out.println("\n  base: "+str+" ::: link  : " + link.absUrl("href"));
                        }
                    }
                }
            }
            //if nothing is left to crawl break the loop
            if (flag == 0) {
                break;
            }
            //add all retrieved links to linksset
            linksset.addAll(tempset);
        }

        System.out.println("\n\ntotal problem urls " + problemset.size());

        int i = 0;
        for (String str : problemset) {
            System.out.println("link " + i + " : " + str);
            i++;
        }

    } catch (IOException ex) {
        Logger.getLogger(HackerEarthCrawler.class.getName()).log(Level.SEVERE, null, ex);
    }

    //scrap and store into database
    //for every problem url scrap problem page
    for (String problemUrl : problemset) {

        System.out.println("problemUrl :" + problemUrl);
        try {
            //create problem class to store in database
            Problem problem = new Problem();
            String problemSIOC = "", problemIOC = "";
            String problemTitle = "", problemStatement = "", problemInput = "", problemOutput = "",
                    problemConstraints = "";
            String sampleInput = "", sampleOutput = "";
            String problemExplanation = "";
            //set default timelimit to 1 second
            double problemTimeLimit = 1.0;
            ArrayList<String> tags = new ArrayList<String>();

            //get response for given problem url
            Response response = Jsoup.connect(problemUrl).execute();
            Document doc = response.parse();

            //retrieve problem title from page
            Element elementTitle = doc.getElementsByTag("title").first();
            StringTokenizer stTitle = new StringTokenizer(elementTitle.text(), "|");
            problemTitle = stTitle.nextToken().trim();

            Element content = doc.getElementsByClass("starwars-lab").first();
            problemSIOC = content.text();
            Elements e = content.children();

            //to find problem statement
            String breakloop[] = { "input", "input:", "input :", "input format:", "input format :",
                    "input format", "Input and output", "constraints :", "constraints:", "constraints",
                    "$$Input :$$" };
            flag = 0;
            for (Element p : e) {
                String tempStatement = "";
                for (Element pp : p.getAllElements()) {

                    for (String strbreak : breakloop) {
                        if (StringUtils.equalsIgnoreCase(pp.ownText(), strbreak)) {
                            //System.out.println("strbreak :"+strbreak);

                            tempStatement = p.text().substring(0,
                                    p.text().toLowerCase().indexOf(strbreak.toLowerCase()));
                            // System.out.println("temp "+tempStatement);
                            flag = 1;
                            break;
                        }
                    }
                }

                if (flag == 1) {
                    problemStatement += tempStatement;
                    //remove extra space at end
                    if (tempStatement.length() == 0) {
                        problemStatement = problemStatement.substring(0, problemStatement.length() - 1);
                    }
                    break;
                }
                problemStatement += p.text() + " ";
            }

            System.out.println("problemSIOC :" + problemSIOC);
            System.out.println("problemStatement :" + problemStatement);

            if (problemStatement.length() <= problemSIOC.length()) {
                //remove problem statement from whole text and remove extra spaces at the beginning and the end
                problemIOC = problemSIOC.substring(problemStatement.length()).trim();
            } else {
                problemIOC = "";
            }

            System.out.println("problemIOC :" + problemIOC);

            //keywords for identifying input
            String decideInput[] = { "Input format :", "Input format:", "Input format", "inputformat:",
                    "inputformat :", "inputformat", "input and output", "input :", "input:", "input" };
            //keywords for identifying output
            String decideOutput[] = { "output format :", "output format:", "Output format", "outputformat:",
                    "outputformat :", "outputformat", "output :", "output:", "output" };
            //keywords for identifying constraint
            String decideConstraint[] = { "constraints:", "constraints :", "constraints", "Constraints :",
                    "constraint:", "constraint :", "constraint", "Contraints :" };

            int posin = 0, posoutput = 0, poscon = 0, idxin, idxout, idxcon, flaginput = 0, flagoutput = 0,
                    flagcon = 0, inlen = 0, outlen = 0, conlen = 0;

            //find inputformat position,length of keyword
            for (idxin = 0; idxin < decideInput.length; idxin++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideInput[idxin])) {

                    posin = problemIOC.toLowerCase().indexOf(decideInput[idxin].toLowerCase());
                    flaginput = 1;
                    inlen = decideInput[idxin].length();

                    //decide it is keyowrd for actucal input or it is "sample input"
                    if (StringUtils.containsIgnoreCase(problemIOC, "sample input")) {
                        if (posin > problemIOC.toLowerCase().indexOf("sample input")) {
                            flaginput = 0;
                            inlen = 0;
                        } else {
                            break;
                        }
                    } else {
                        break;
                    }
                }
            }

            //find outputformat position,length of keyword
            for (idxout = 0; idxout < decideOutput.length; idxout++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideOutput[idxout])) {
                    posoutput = problemIOC.toLowerCase().indexOf(decideOutput[idxout].toLowerCase());
                    flagoutput = 1;
                    outlen = decideOutput[idxout].length();
                    break;
                }
            }

            //find constraint position,length of keyword
            for (idxcon = 0; idxcon < decideConstraint.length; idxcon++) {
                if (StringUtils.containsIgnoreCase(problemIOC, decideConstraint[idxcon])) {
                    poscon = problemIOC.toLowerCase().indexOf(decideConstraint[idxcon].toLowerCase());
                    flagcon = 1;
                    conlen = decideConstraint[idxcon].length();
                    break;
                }
            }

            System.out.println("input " + flaginput + " " + inlen + " " + posin);
            System.out.println("output " + flagoutput + " " + outlen + " " + posoutput);
            System.out.println("constraint " + flagcon + " " + conlen + " " + poscon);
            //retrieve problem input and output if present in problem page

            //if input format is present
            if (flaginput == 1) {
                //if input keyword is "input and output" and contraint is present in problem page
                if (idxin == 6 && flagcon == 1) {
                    problemInput = problemIOC.substring(inlen, poscon);
                }
                //if input keyword is "input and output" and contraint is not present in problem page
                else if (idxin == 6 && flagcon == 0) {
                    problemInput = problemIOC.substring(inlen);
                }
                //if output format and constraint is present
                else if (flagoutput == 1 && flagcon == 1) {
                    //if constraint is present before input format
                    if (poscon < posin) {
                        problemInput = problemIOC.substring(posin + inlen, posoutput);
                        problemOutput = problemIOC.substring(posoutput + outlen);
                    }
                    //if constraint is present before sample
                    else if (poscon < posoutput) {
                        problemInput = problemIOC.substring(inlen, poscon);
                        problemOutput = problemIOC.substring(posoutput + outlen);
                    } else {
                        problemInput = problemIOC.substring(inlen, posoutput);
                        problemOutput = problemIOC.substring(posoutput + outlen, poscon);
                    }
                }
                //if constraint is not present
                else if (flagoutput == 1 && flagcon == 0) {
                    problemInput = problemIOC.substring(inlen, posoutput);
                    problemOutput = problemIOC.substring(posoutput + outlen);
                } else if (flagoutput == 0 && flagcon == 1) {
                    if (poscon < posin) {
                        problemInput = problemIOC.substring(posin + inlen);
                    } else {
                        problemInput = problemIOC.substring(poscon + conlen, posin);
                    }
                    problemOutput = "";
                } else {
                    problemInput = problemIOC.substring(inlen);
                    problemOutput = "";
                }
            }
            //if input format and output format is not present
            else {
                problemInput = "";
                problemOutput = "";
            }

            //if constraint is present
            if (flagcon == 1) {
                //if constraint is present before input format
                if (poscon < posin) {
                    problemConstraints = problemIOC.substring(0, posin);
                }
                //if constraint is present before output format
                else if (poscon < posoutput) {
                    problemConstraints = problemIOC.substring(poscon + conlen, posoutput);
                } else {
                    problemConstraints = problemIOC.substring(poscon + conlen);
                }
            }

            System.out.println("problemInput :" + problemInput);
            System.out.println("problemOutput :" + problemOutput);
            System.out.println("problemConstraints :" + problemConstraints);

            //retrieve problem tags from problem page
            Element elementtag = doc.getElementsByClass("problem-tags").first().child(1);
            StringTokenizer st = new StringTokenizer(elementtag.text(), ",");
            while (st.hasMoreTokens()) {
                tags.add(st.nextToken().trim());
            }

            //retrieve sample input sample output if present
            Element elementSIO = doc.getElementsByClass("input-output-container").first();
            //if sample input output is present
            if (elementSIO != null) {
                //find position of sample output
                int soutpos = elementSIO.text().indexOf("SAMPLE OUTPUT");
                sampleInput = elementSIO.text().substring(12, soutpos);
                sampleOutput = elementSIO.text().substring(soutpos + 13);
                System.out.println("Sample input :\n" + sampleInput + "\n\n\n");
                System.out.println("Sample Output :\n" + sampleOutput);
            } else {
                sampleInput = "";
                sampleOutput = "";
            }

            //retrieve problem explanation from problem page if present
            Element elementExplanation = doc.getElementsByClass("standard-margin").first().child(0);
            if (elementExplanation.text().toLowerCase().contains("explanation")) {
                problemExplanation = elementExplanation.nextElementSibling().text();
            }
            System.out.println("Explanation :" + problemExplanation);

            //retrieve timelimit
            Element elementTL = doc.getElementsByClass("problem-guidelines").first().child(0).child(1);
            StringTokenizer stTL = new StringTokenizer(elementTL.ownText(), " ");
            problemTimeLimit = Double.parseDouble(stTL.nextToken());

            //System.out.println("problemTimeLimit :"+problemTimeLimit);
            //set all retrieved information to problem class
            problem.setProblemUrl(problemUrl);
            if (problemTitle.length() == 0) {
                problemTitle = null;
            }
            if (problemStatement.length() == 0) {
                problemStatement = null;
            }
            if (problemInput.length() == 0) {
                problemInput = null;
            }
            if (problemOutput.length() == 0) {
                problemOutput = null;
            }
            if (problemExplanation.length() == 0) {
                problemExplanation = null;
            }
            if (problemConstraints.length() == 0) {
                problemConstraints = null;
            }
            problem.setTitle(problemTitle);
            problem.setProblemUrl(problemUrl);
            problem.setProblemStatement(problemStatement);
            problem.setInputFormat(problemInput);
            problem.setOutputFormat(problemOutput);
            problem.setTimeLimit(problemTimeLimit);
            problem.setExplanation(problemExplanation);
            problem.setConstraints(problemConstraints);

            //set sample input output to problem class
            SampleInputOutput sampleInputOutput = new SampleInputOutput(problem, sampleInput, sampleOutput);
            problem.getSampleInputOutputs().add(sampleInputOutput);
            //set platform as hackerearth
            problem.setPlatform(Platform.HackerEarth);
            for (String strtag : tags) {
                problem.getTags().add(strtag);
            }

            //store in database
            Session session = null;
            Transaction transaction = null;
            try {
                //start session
                session = HibernateUtil.getSessionFactory().openSession();
                transaction = session.beginTransaction();

                //check if problem is already stored in database
                String hql = "FROM Problem p where p.problemUrl = :problem_url";
                Problem oldProblem = (Problem) session.createQuery(hql).setString("problem_url", problemUrl)
                        .uniqueResult();
                String task;

                //if problem is present in database
                if (oldProblem != null) {
                    //update the old problem
                    task = "updated";
                    //retrieve id of old problem
                    problem.setId(oldProblem.getId());
                    session.delete(oldProblem);
                    session.flush();
                    session.save(problem);
                } else {
                    task = "saved";
                    session.save(problem);
                }

                transaction.commit();
                //log the info to console
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}",
                        new Object[] { task, problem.getProblemUrl() });
            } catch (HibernateException ee) {
                if (transaction != null) {
                    transaction.rollback();
                }
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE,
                        "Cannot Insert/Update problem into databse: " + problemUrl, e);
            } finally {
                //close the session
                if (session != null) {
                    session.close();
                }
            }
        } catch (Exception ee) {
            System.out.println(ee.toString());
        }
    }

    System.out.println("\n\n\n\ntutorial urls\n\n");
    try {

        for (String tutorialurl : tutorialset) {
            //System.out.println(tutorialurl+"\n\n");
            Response tutorialres = Jsoup.connect(tutorialurl).execute();
            Document doc = tutorialres.parse();

            Tutorial tutorial = new Tutorial();
            tutorial.setContent(doc.getElementsByClass("tutorial").first().text());

            tutorial.setName(baseUrl);
            tutorialurl = tutorialurl.substring(0, tutorialurl.length() - 10);
            StringTokenizer tutorialtok = new StringTokenizer(tutorialurl, "/");

            String tempstr = "";
            while (tutorialtok.hasMoreTokens()) {
                tempstr = tutorialtok.nextToken();
            }

            Session session = null;
            Transaction transaction = null;
            try {
                //start session
                session = HibernateUtil.getSessionFactory().openSession();
                transaction = session.beginTransaction();

                //check if problem is already stored in database
                String hql = "FROM Tutorial p where p.name = :name";
                Tutorial oldProblem = (Tutorial) session.createQuery(hql).setString("name", tempstr)
                        .uniqueResult();
                String task;

                //if problem is present in database
                if (oldProblem != null) {
                    //update the old problem
                    task = "updated";
                    //retrieve id of old problem
                    tutorial.setName(oldProblem.getName());
                    session.delete(oldProblem);
                    session.flush();
                    session.save(tutorial);
                } else {
                    task = "saved";
                    tutorial.setName(tempstr);
                    session.save(tutorial);
                }

                transaction.commit();
                //log the info to console
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.INFO, "{0} {1}",
                        new Object[] { task, tutorial.getName() });
            } catch (HibernateException ee) {
                if (transaction != null) {
                    transaction.rollback();
                }
                Logger.getLogger(CodeForcesCrawler.class.getName()).log(Level.SEVERE,
                        "Cannot Insert/Update problem into databse: " + tempstr, ee);
            } finally {
                //close the session
                if (session != null) {
                    session.close();
                }
            }

        }
    } catch (Exception e) {
        System.out.println(e.getMessage());
    }
}

From source file:de.geeksfactory.opacclient.apis.Bibliotheca.java

@Override
public ReservationResult reservation(DetailledItem item, Account acc, int useraction, String selection)
        throws IOException {
    String reservation_info = item.getReservation_info();

    Document doc = null;

    if (useraction == MultiStepResult.ACTION_CONFIRMATION) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair("make_allvl", "Bestaetigung"));
        nameValuePairs.add(new BasicNameValuePair("target", "makevorbest"));
        httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs), getDefaultEncoding());
        return new ReservationResult(MultiStepResult.Status.OK);
    } else if (selection == null || useraction == 0) {
        String html = httpGet(opac_url + "/" + reservation_info, getDefaultEncoding());
        doc = Jsoup.parse(html);/* w w  w .  ja  v  a  2 s  . co  m*/

        if (doc.select("input[name=AUSWEIS]").size() > 0) {
            // Needs login
            List<NameValuePair> nameValuePairs = new ArrayList<>(2);
            nameValuePairs.add(new BasicNameValuePair("AUSWEIS", acc.getName()));
            nameValuePairs.add(new BasicNameValuePair("PWD", acc.getPassword()));
            if (data.has("db")) {
                try {
                    nameValuePairs.add(new BasicNameValuePair("vkontodb", data.getString("db")));
                } catch (JSONException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            nameValuePairs.add(new BasicNameValuePair("B1", "weiter"));
            nameValuePairs.add(new BasicNameValuePair("target", doc.select("input[name=target]").val()));
            nameValuePairs.add(new BasicNameValuePair("type", "VT2"));
            html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                    getDefaultEncoding());
            doc = Jsoup.parse(html);
        }
        if (doc.select("select[name=" + branch_inputfield + "]").size() == 0) {
            if (doc.select("select[name=VZST]").size() > 0) {
                branch_inputfield = "VZST";
            }
        }
        if (doc.select("select[name=" + branch_inputfield + "]").size() > 0) {
            List<Map<String, String>> branches = new ArrayList<>();
            for (Element option : doc.select("select[name=" + branch_inputfield + "]").first().children()) {
                String value = option.text().trim();
                String key;
                if (option.hasAttr("value")) {
                    key = option.attr("value");
                } else {
                    key = value;
                }
                Map<String, String> selopt = new HashMap<>();
                selopt.put("key", key);
                selopt.put("value", value);
                branches.add(selopt);
            }
            _res_target = doc.select("input[name=target]").attr("value");
            ReservationResult result = new ReservationResult(MultiStepResult.Status.SELECTION_NEEDED);
            result.setActionIdentifier(ReservationResult.ACTION_BRANCH);
            result.setSelection(branches);
            return result;
        }
    } else if (useraction == ReservationResult.ACTION_BRANCH) {
        List<NameValuePair> nameValuePairs = new ArrayList<>(2);
        nameValuePairs.add(new BasicNameValuePair(branch_inputfield, selection));
        nameValuePairs.add(new BasicNameValuePair("button2", "weiter"));
        nameValuePairs.add(new BasicNameValuePair("target", _res_target));
        String html = httpPost(opac_url + "/index.asp", new UrlEncodedFormEntity(nameValuePairs),
                getDefaultEncoding());
        doc = Jsoup.parse(html);
    }

    if (doc == null) {
        return new ReservationResult(MultiStepResult.Status.ERROR);
    }

    if (doc.select("input[name=target]").size() > 0) {
        if (doc.select("input[name=target]").attr("value").equals("makevorbest")) {
            List<String[]> details = new ArrayList<>();

            if (doc.getElementsByClass("kontomeldung").size() == 1) {
                details.add(new String[] { doc.getElementsByClass("kontomeldung").get(0).text().trim() });
            }
            Pattern p = Pattern.compile("geb.hr", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
            for (Element div : doc.select(".kontozeile_center")) {
                for (String text : Jsoup.parse(div.html().replaceAll("(?i)<br[^>]*>", "br2n")).text()
                        .split("br2n")) {
                    if (p.matcher(text).find() && !text.contains("usstehend")
                            && text.contains("orbestellung")) {
                        details.add(new String[] { text.trim() });
                    }
                }
            }

            if (doc.select("#vorbest").size() > 0 && doc.select("#vorbest").val().contains("(")) {
                // Erlangen uses "Kostenpflichtige Vorbestellung (1 Euro)"
                // as the label of its reservation button
                details.add(new String[] { doc.select("#vorbest").val().trim() });
            }

            for (Element row : doc.select(".kontozeile_center table tr")) {
                if (row.select(".konto_feld").size() == 1 && row.select(".konto_feldinhalt").size() == 1) {
                    details.add(new String[] { row.select(".konto_feld").text().trim(),
                            row.select(".konto_feldinhalt").text().trim() });
                }
            }
            ReservationResult result = new ReservationResult(MultiStepResult.Status.CONFIRMATION_NEEDED);
            result.setDetails(details);
            return result;
        }
    }

    if (doc.getElementsByClass("kontomeldung").size() == 1) {
        return new ReservationResult(MultiStepResult.Status.ERROR,
                doc.getElementsByClass("kontomeldung").get(0).text());
    }

    return new ReservationResult(MultiStepResult.Status.ERROR,
            stringProvider.getString(StringProvider.UNKNOWN_ERROR));
}

From source file:com.storm.function.GsxtFunction.java

private Map<String, Object> getHtmlInfoMapOfTianjin(String area, HtmlPage firstInfoPage, String keyword,
        ChannelLogger LOGGER) throws Exception {
    Map<String, Object> resultHtmlMap = new HashMap<String, Object>();
    //????/*w w  w.j a v a2s .  com*/
    //      System.out.println(firstInfoPage.asXml());
    WebWindow window = firstInfoPage.getWebClient().getCurrentWindow();
    @SuppressWarnings("unchecked")
    List<HtmlAnchor> divByXPath = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='result-item']");
    HtmlElement firstByXPath = ((HtmlElement) firstInfoPage
            .getFirstByXPath("//div[@class='content']/div[@style='font-size:12px']"));
    if (divByXPath.size() == 0 && firstByXPath == null) {
        resultHtmlMap.put("statusCodeDef", StatusCodeDef.IMAGECODE_ERROR);
    } else {
        if (firstByXPath != null) {
            String textContent = firstByXPath.getTextContent();
            if (textContent.indexOf("? 0 ?") > 0) {
                resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
            } else {
                resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);
            }
        } else {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.SCCCESS);
        }
    }
    @SuppressWarnings("unchecked")
    List<HtmlAnchor> anchors = (List<HtmlAnchor>) firstInfoPage.getByXPath("//div[@class='result-item']/div/a");
    LOGGER.info(anchors.toString());
    if (anchors != null && !anchors.isEmpty()) {
        boolean matchFlag = false;
        for (HtmlAnchor anchor : anchors) {
            String anchorTitle = anchor.getTextContent().toString().trim();
            if (anchorTitle.contains(keyword)) { //????
                matchFlag = true;
                //???
                HtmlElement target_item_info = (HtmlElement) anchor.getParentNode().getParentNode();
                resultHtmlMap.put("target_item_info", target_item_info.asXml());

                //*****************?    *****************
                //?? ?->?->?
                //?entId
                //?
                String ent_id = anchor.getAttribute("href");
                if (!StringUtils.isEmpty(ent_id)) {
                    ent_id = ent_id.split("=")[1];
                }
                if (!StringUtils.isEmpty(ent_id)) {
                    /*HtmlPage gsgsxx_djxx_jbxx = anchor.click();
                    Thread.sleep(3000);
                    resultHtmlMap.put("gsgsxx_djxx_jbxx", gsgsxx_djxx_jbxx.asXml());
                    */
                    String gsgsxx_djxx_jbxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=dj";
                    Page gsgsxx_djxx_jbxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_djxx_jbxx_url)));
                    resultHtmlMap.put("gsgsxx_djxx_jbxx",
                            gsgsxx_djxx_jbxx.getWebResponse().getContentAsString("utf-8"));

                    //?? ?->?->?->?->
                    Document qygsxxHtml = Jsoup
                            .parseBodyFragment(gsgsxx_djxx_jbxx.getWebResponse().getContentAsString("utf-8"));
                    if (qygsxxHtml != null) {
                        Element qynbDiv = qygsxxHtml.getElementById("touziren");
                        if (qynbDiv != null) {
                            Elements qynb_trs = qynbDiv.select("tbody").select("tr").select("td").select("a");
                            if (qynb_trs.size() != 0) {
                                List<String> list = new ArrayList<String>();
                                for (int i = 0; i < qynb_trs.size(); i++) {
                                    //System.out.println(qynb_trs.get(i).toString());
                                    //System.out.println(qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0].split("\\'\\,\\'")[0]);
                                    if (qynb_trs.get(i).toString() != null
                                            && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0]
                                                    .split("\\'\\,\\'")[0] != null) {
                                        String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/saicpf/gsgdcz?gdczid="
                                                + qynb_trs.get(i).toString().split("\\(\\'")[1]
                                                        .split("\\'\\)")[0].split("\\'\\,\\'")[0]
                                                + "&entid=" + ent_id + "&issaic=1&hasInfo=0";
                                        Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage(
                                                window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url)));
                                        list.add(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                    }
                                }
                                if (list.size() != 0) {
                                    resultHtmlMap.put("gsgsxx_djxx_gdxx", list);
                                }
                            }
                        }
                    }

                    //? ?->?->?? 
                    String gsgsxx_baxx_zyryxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=ba";
                    Page gsgsxx_baxx_zyryxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_baxx_zyryxx_url)));
                    resultHtmlMap.put("gsgsxx_baxx_zyryxx",
                            gsgsxx_baxx_zyryxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->?->?
                    String gsgsxx_dcdydjxx_dcdydjxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=dcdydjxx";
                    Page gsgsxx_dcdydjxx_dcdydjxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_dcdydjxx_dcdydjxx_url)));
                    resultHtmlMap.put("gsgsxx_dcdydjxx_dcdydjxx",
                            gsgsxx_dcdydjxx_dcdydjxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->??->??
                    String gsgsxx_gqczdjxx_gqczdjxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=gqczdjxx";
                    Page gsgsxx_gqczdjxx_gqczdjxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_gqczdjxx_gqczdjxx_url)));
                    resultHtmlMap.put("gsgsxx_gqczdjxx_gqczdjxx",
                            gsgsxx_gqczdjxx_gqczdjxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->?->?
                    String gsgsxx_xzcfxx_xzcfxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=xzcf";
                    Page gsgsxx_xzcfxx_xzcfxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_xzcfxx_xzcfxx_url)));
                    resultHtmlMap.put("gsgsxx_xzcfxx_xzcfxx",
                            gsgsxx_xzcfxx_xzcfxx.getWebResponse().getContentAsString("utf-8"));

                    //
                    Document xzcfxxHtml = Jsoup.parseBodyFragment(
                            gsgsxx_xzcfxx_xzcfxx.getWebResponse().getContentAsString("utf-8"));
                    if (xzcfxxHtml != null) {
                        Elements qynbDiv = xzcfxxHtml.getElementsByClass("result-table");
                        if (qynbDiv != null && qynbDiv.size() != 0) {
                            Elements qynb_trs = qynbDiv.get(0).select("tbody").select("tr").select("td")
                                    .select("a");
                            if (qynb_trs.size() != 0) {
                                List<String> list = new ArrayList<String>();
                                for (int i = 0; i < qynb_trs.size(); i++) {
                                    if (qynb_trs.get(i).toString() != null
                                            && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0]
                                                    .split("\\'\\,\\'")[0] != null) {
                                        String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/saicpf/gsxzcf?id="
                                                + qynb_trs.get(i).toString().split("\\(\\'")[1]
                                                        .split("\\'\\)")[0].split("\\'\\,\\'")[0]
                                                + "&entid=" + ent_id + "&issaic=1&hasInfo=0";
                                        Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage(
                                                window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url)));
                                        list.add(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                    }
                                }
                                if (list.size() != 0) {
                                    resultHtmlMap.put("gsgsxx_xzcfxx_detail", list);
                                }
                            }
                        }
                    }

                    //? ?->???->???
                    String gsgsxx_jyycxx_jyycxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=qyjyycmlxx";
                    Page gsgsxx_jyycxx_jyycxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_jyycxx_jyycxx_url)));
                    resultHtmlMap.put("gsgsxx_jyycxx_jyycxx",
                            gsgsxx_jyycxx_jyycxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->???->???
                    String gsgsxx_yzwfxx_yzwfxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=yzwfqyxx";
                    Page gsgsxx_yzwfxx_yzwfxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_yzwfxx_yzwfxx_url)));
                    resultHtmlMap.put("gsgsxx_yzwfxx_yzwfxx",
                            gsgsxx_yzwfxx_yzwfxx.getWebResponse().getContentAsString("utf-8"));

                    //? ?->?->?
                    String gsgsxx_ccjcxx_ccjcxx_url = "http://tjcredit.gov.cn/platform/saic/baseInfo.json?entId="
                            + ent_id + "&departmentId=scjgw&infoClassId=ccjcxx";
                    Page gsgsxx_ccjcxx_ccjcxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(gsgsxx_ccjcxx_ccjcxx_url)));
                    resultHtmlMap.put("gsgsxx_ccjcxx_ccjcxx",
                            gsgsxx_ccjcxx_ccjcxx.getWebResponse().getContentAsString("utf-8"));

                    //*****************?   ?*****************
                    //*****************??   *****************

                    //? ??->?-> 
                    String qygsxx_qynb_list_url = "http://tjcredit.gov.cn/report/nblist?entid=" + ent_id;
                    Page qygsxx_qynb_list = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_qynb_list_url)));
                    resultHtmlMap.put("qygsxx_qynb_list",
                            qygsxx_qynb_list.getWebResponse().getContentAsString("utf-8"));
                    //? ??->?->->

                    //?
                    Document qynbHtml = Jsoup
                            .parseBodyFragment(qygsxx_qynb_list.getWebResponse().getContentAsString("utf-8"));
                    if (qynbHtml != null) {
                        Element qynbDiv = qynbHtml.getElementById("touziren");
                        if (qynbDiv != null) {
                            Elements qynb_trs = qynbDiv.select("tbody").select("tr").select("td").select("a");
                            if (qynb_trs.size() != 0) {
                                List<String> list = new ArrayList<String>();
                                for (int i = 0; i < qynb_trs.size(); i++) {
                                    //System.out.println(qynb_trs.get(i).toString());
                                    //System.out.println(qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0].split("\\'\\,\\'")[0]);
                                    if (qynb_trs.get(i).toString() != null
                                            && qynb_trs.get(i).toString().split("\\(\\'")[1].split("\\'\\)")[0]
                                                    .split("\\'\\,\\'")[1] != null) {
                                        String gsgsxx_djxx_gdxx_detail_url = "http://tjcredit.gov.cn/report/annals?entid="
                                                + ent_id + "&year="
                                                + qynb_trs.get(i).toString().split("\\(\\'")[1]
                                                        .split("\\'\\)")[0].split("\\'\\,\\'")[1]
                                                + "&hasInfo=0";
                                        Page gsgsxx_djxx_gdxx_detail = firstInfoPage.getWebClient().getPage(
                                                window, new WebRequest(new URL(gsgsxx_djxx_gdxx_detail_url)));
                                        System.out.println(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                        list.add(gsgsxx_djxx_gdxx_detail.getWebResponse()
                                                .getContentAsString("utf-8"));
                                    }
                                }
                                if (list.size() != 0) {
                                    resultHtmlMap.put("qygsxx_qynb_detail", list);
                                }
                            }
                        }
                    }

                    //? ??->??
                    String qygsxx_xzxkxx_url = "http://tjcredit.gov.cn/report/xzxk?entid=" + ent_id;
                    Page qygsxx_xzxkxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_xzxkxx_url)));
                    resultHtmlMap.put("qygsxx_xzxkxx",
                            qygsxx_xzxkxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->??
                    String qygsxx_gdjczxx_url = "http://tjcredit.gov.cn/report/gdcz?entid=" + ent_id;
                    Page qygsxx_gdjczxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_gdjczxx_url)));
                    resultHtmlMap.put("qygsxx_gdjczxx",
                            qygsxx_gdjczxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->???
                    String qygsxx_gqbgxx_url = "http://tjcredit.gov.cn/report/gqbg?entid=" + ent_id;
                    Page qygsxx_gqbgxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_gqbgxx_url)));
                    resultHtmlMap.put("qygsxx_gqbgxx",
                            qygsxx_gqbgxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->??
                    String qygsxx_zscqczdjxx_url = "http://tjcredit.gov.cn/report/zscq?entid=" + ent_id;
                    Page qygsxx_zscqczdjxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_zscqczdjxx_url)));
                    resultHtmlMap.put("qygsxx_zscqczdjxx",
                            qygsxx_zscqczdjxx.getWebResponse().getContentAsString("utf-8"));

                    //? ??->?
                    String qygsxx_xzcfxx_url = "http://tjcredit.gov.cn/report/xzcf?entid=" + ent_id;
                    Page qygsxx_xzcfxx = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_xzcfxx_url)));
                    resultHtmlMap.put("qygsxx_xzcfxx",
                            qygsxx_xzcfxx.getWebResponse().getContentAsString("utf-8"));

                    //*****************??   ?*****************
                    //*****************????   *****************
                    //? ????->??
                    String sfxzgsxx_gqdjxx_list_url = "http://tjcredit.gov.cn/report/gddjlist?entid=" + ent_id;
                    Page sfxzgsxx_gqdjxx_list = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(sfxzgsxx_gqdjxx_list_url)));
                    resultHtmlMap.put("sfxzgsxx_gqdjxx_list",
                            sfxzgsxx_gqdjxx_list.getWebResponse().getContentAsString("utf-8"));
                    /*   //? ????->??->->
                       @SuppressWarnings("unchecked")
                       List<HtmlAnchor> anchors4 = (List<HtmlAnchor>) sfxzgsxx_gqdjxx_list_page.getByXPath("//table[@id='touziren']/tbody[@id='table2']/tr/td/a");
                       if (anchors4!=null && !anchors4.isEmpty()) {
                          List<String> detail=new ArrayList<String>();
                          for (@SuppressWarnings("unused") HtmlAnchor anchor4 : anchors4) {
                             HtmlPage sfxzgsxx_gqdjxx_detail = anchor4.click();
                    //                     System.out.println("gsgsxx_qynb_detail.asXml()="+gsgsxx_qynb_detail.asXml());
                             detail.add(sfxzgsxx_gqdjxx_detail.asXml());
                          }
                          resultHtmlMap.put("sfxzgsxx_gqdjxx_details",detail);
                       }*/
                    //? ??->??
                    String qygsxx_gdbgxx_list_url = "http://tjcredit.gov.cn/report/gdbglist?entid=" + ent_id;
                    Page qygsxx_gdbgxx_list = firstInfoPage.getWebClient().getPage(window,
                            new WebRequest(new URL(qygsxx_gdbgxx_list_url)));
                    resultHtmlMap.put("qygsxx_gdbgxx_list",
                            qygsxx_gdbgxx_list.getWebResponse().getContentAsString("utf-8"));
                    //? ??->??->->
                    /*   @SuppressWarnings("unchecked")
                       List<HtmlAnchor> anchors5 = (List<HtmlAnchor>) qygsxx_gdbgxx_page.getByXPath("//table[@id='touziren']/tbody[@id='table2']/tr/td/a");
                       if (anchors5!=null && !anchors5.isEmpty()) {
                          List<String> detail=new ArrayList<String>();
                          for (@SuppressWarnings("unused") HtmlAnchor anchor5 : anchors5) {
                             HtmlPage qygsxx_gdbgxx_detail = anchor5.click();
                    //                     System.out.println("gsgsxx_qynb_detail.asXml()="+gsgsxx_qynb_detail.asXml());
                             detail.add(qygsxx_gdbgxx_detail.asXml());
                          }
                          resultHtmlMap.put("qygsxx_gdbgxx_details",detail);
                       }*/

                    //*****************????   ?*****************
                }
                break;//
            }
        }
        if (!matchFlag) {
            resultHtmlMap.put("statusCodeDef", StatusCodeDef.NO_DATA_FOUND);
            LOGGER.info("????");
        }
    }

    return resultHtmlMap;
}