List of usage examples for org.jsoup.select Elements select
public Elements select(String query)
From source file:com.normalexception.app.rx8club.fragment.pm.PrivateMessageViewFragment.java
/** * Format the user post by removing the vb style quotes and the * duplicate youtube links/*from w ww . j a v a 2 s. co m*/ * @param innerPost The element that contains the inner post * @return The formatted string */ private String formatUserPost(Elements innerPost) { // Remove the duplicate youtube links (this is caused by a plugin on // the forum that embeds youtube videos automatically) for (Element embedded : innerPost.select("div[id^=ame_doshow_post_]")) embedded.remove(); // Remove the vbulletin quotes String upost = Utils.reformatQuotes(innerPost.html()); return upost; }
From source file:com.normalexception.app.rx8club.fragment.ProfileFragment.java
/** * Get the user information from the users profile * @param doc The page document/*from ww w .java 2s . c o m*/ */ private void getUserInformation(Document doc) { final UserProfile upInstance = UserProfile.getInstance(); stubs = new ArrayList<ProfileModel>(); // Title Elements userInfo = doc.select("div[id=main_userinfo]"); Elements title = userInfo.select("h2"); upInstance.setUserTitle(title.text()); // Posts Elements statisticInfo = doc.select("fieldset[class=statistics_group]"); Elements post = statisticInfo.select("li"); // Profile Pic Elements profilePicInfo = doc.select("td[id=profilepic_cell] > img"); // Grab image, trap try { upInstance.setUserImageLink(profilePicInfo.attr("src")); } catch (Exception e) { } // Grab Post count, trap exception try { upInstance.setUserPostCount( post.get(0).text() + " / " + post.get(1).text().split(" ", 4)[3] + " per day"); } catch (Exception e) { upInstance.setUserPostCount("Error Getting Post Count"); } // Grab Join Date, trap exception try { upInstance.setUserJoinDate(post.get(13).text()); } catch (Exception e) { upInstance.setUserJoinDate("Error Getting Join Date"); } // Threads String link = WebUrls.userUrl + upInstance.getUserId(); doc = VBForumFactory.getInstance().get(getActivity(), link); if (doc != null) { Elements threadlist = doc.select("table[id^=post]"); for (Element threadl : threadlist) { ProfileModel stub = new ProfileModel(); Elements divs = threadl.getElementsByTag("div"); Elements div = divs.get(1).getElementsByTag("a"); stub.setLink(div.attr("href")); stub.setName(div.text()); div = divs.get(5).getElementsByTag("a"); stub.setText(div.text()); stubs.add(stub); } } }
From source file:com.normalexception.app.rx8club.fragment.thread.EditPostFragment.java
/** * Report the value inside of an input element * @param pan The panel where all of the input elements reside * @param name The name of the input to get the value for * @return The string value of the input *//* w ww . j ava 2 s .c o m*/ private String getInputElementValue(Elements pan, String name) { return pan.select("input[name=" + name + "]").attr("value"); }
From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java
/** * Grab contents from the forum that the user clicked on * @param doc The document parsed from the link * @param id The id number of the link * @return An arraylist of forum contents */// w ww.j av a 2 s. co m public void getThreadContents(Document doc) { // Update pagination try { Elements pageNumbers = doc.select("div[class=pagenav]"); if (pageNumbers.first() != null) { Elements pageLinks = pageNumbers.first().select("td[class^=vbmenu_control]"); thisPage = pageLinks.text().split(" ")[1]; finalPage = pageLinks.text().split(" ")[3]; Log.d(TAG, String.format("This Page: %s, Final Page: %s", thisPage, finalPage)); } else { Log.d(TAG, "Thread only contains one page"); } } catch (Exception e) { Log.e(TAG, "We had an error with pagination", e); } // Is user thread admin?? Elements threadTools = doc.select("div[id=threadtools_menu] > form > table"); if (threadTools.text().contains(MODERATION_TOOLS)) { Log.d(TAG, "<><> User has administrative rights here! <><>"); } else { //adminContent.setVisibility(View.GONE); lv.removeHeaderView(adminContent); } // Get the user's actual ID, there is a chance they never got it // before UserProfile.getInstance().setUserId(HtmlFormUtils.getInputElementValueByName(doc, "loggedinuser")); // Get Post Number and security token securityToken = HtmlFormUtils.getInputElementValueByName(doc, "securitytoken"); Elements pNumber = doc.select("a[href^=http://www.rx8club.com/newreply.php?do=newreply&noquote=1&p=]"); String pNumberHref = pNumber.attr("href"); postNumber = pNumberHref.substring(pNumberHref.lastIndexOf("=") + 1); threadNumber = doc.select("input[name=searchthreadid]").attr("value"); Elements posts = doc.select("div[id=posts]").select("div[id^=edit]"); Log.v(TAG, String.format("Parsing through %d posts", posts.size())); for (Element post : posts) { try { Elements innerPost = post.select("table[id^=post]"); // User Control Panel Elements userCp = innerPost.select("td[class=alt2]"); Elements userDetail = userCp.select("div[class=smallfont]"); Elements userSubDetail = userDetail.last().select("div"); Elements userAvatar = userDetail.select("img[alt$=Avatar]"); // User Information PostModel pv = new PostModel(); pv.setUserName(userCp.select("div[id^=postmenu]").text()); pv.setIsLoggedInUser(LoginFactory.getInstance().isLoggedIn() ? UserProfile.getInstance().getUsername().equals(pv.getUserName()) : false); pv.setUserTitle(userDetail.first().text()); pv.setUserImageUrl(userAvatar.attr("src")); pv.setPostDate(innerPost.select("td[class=thead]").first().text()); pv.setPostId(Utils.parseInts(post.attr("id"))); pv.setRootThreadUrl(currentPageLink); // get Likes if any exist Elements eLikes = innerPost.select("div[class*=vbseo_liked] > a"); List<String> likes = new ArrayList<String>(); for (Element eLike : eLikes) likes.add(eLike.text()); pv.setLikes(likes); Iterator<Element> itr = userSubDetail.listIterator(); while (itr.hasNext()) { String txt = itr.next().text(); if (txt.contains("Location:")) pv.setUserLocation(txt); else if (txt.contains("Posts:")) pv.setUserPostCount(txt); else if (txt.contains("Join Date:")) pv.setJoinDate(txt); } // User Post Content pv.setUserPost(formatUserPost(innerPost)); // User signature try { Element userSig = innerPost.select("div[class=konafilter]").first(); pv.setUserSignature(userSig.html()); } catch (NullPointerException npe) { } Elements postAttachments = innerPost.select("a[id^=attachment]"); if (postAttachments != null && !postAttachments.isEmpty()) { ArrayList<String> attachments = new ArrayList<String>(); for (Element postAttachment : postAttachments) { attachments.add(postAttachment.attr("href")); } pv.setAttachments(attachments); } pv.setSecurityToken(securityToken); // Make sure we aren't adding a blank user post if (pv.getUserPost() != null) postlist.add(pv); } catch (Exception e) { Log.w(TAG, "Error Parsing Post...Probably Deleted"); } } }
From source file:com.normalexception.app.rx8club.fragment.thread.ThreadFragment.java
/** * Format the user post by removing the vb style quotes and the * duplicate youtube links// ww w . j a va 2s . com * @param innerPost The element that contains the inner post * @return The formatted string */ private String formatUserPost(Elements innerPost) { try { Element ipost = innerPost.select("td[class=alt1]").select("div[id^=post_message]").first(); // Only if there is a post to key off of if (ipost != null) { // Remove the duplicate youtube links (this is caused by a plugin on // the forum that embeds youtube videos automatically) for (Element embedded : ipost.select("div[id^=ame_doshow_post_]")) embedded.remove(); // Remove the vbulletin quotes return Utils.reformatQuotes(ipost.html()); } else { return null; } } catch (Exception e) { Log.e(TAG, "Error Parsing Post", e); return null; } }
From source file:noThreads.Menu.java
public void createMenu() throws IOException, InterruptedException { Document doc = null;//from www . j a v a2 s .co m BufferedReader br = null; System.out.print("******************** Menu Options ******************** " + "\n1. Get a playlist for all the stations at <e-radio.gr>" + "\n2. View the available station Categories and get a playlist." + "\n3. View the available station Locations and get a playlist." + "\n4. View the station Ratings (Top) and get a playlist." + "\n5. Exit." + "\n\n" + "Please make a choice (1-5): "); br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } switch (choice) { case (1): //GET all the e-radio location links (in order to get all the links) doc = parseUrl(URL, 0); if (doc == null) { print("No connection to the server! Exiting..."); System.exit(1); } Elements links = doc.select("div[id=paneContainer]").select("a[href*=/locations/]"); for (Element link : links) theUrls.add(link.attr("abs:href")); System.out.println("...Processing <All e-radio> station links"); break; case (2): //Get CATEGORIES doc = parseUrl(URL, 0); if (doc == null) { print("No connection to the server! Exiting..."); System.exit(1); } Elements categoryLinks = doc.select("div[id=paneContainer]").select("a[href*=/categories/]"); System.out.println("E-radio stations available categories: " + "\n"); for (int i = 0; i < categoryLinks.size(); i++) { System.out.println(i + 1 + ". " + StringEscapeUtils.unescapeHtml4(categoryLinks.get(i).html())); } System.out.print("\n" + "Please make a choise (1-" + categoryLinks.size() + "): "); br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } if (choice <= categoryLinks.size() && choice >= 1) { theUrls.add(categoryLinks.get(choice - 1).attr("abs:href")); System.out.println("...Processing the <" + StringEscapeUtils.unescapeHtml4(categoryLinks.get(choice - 1).html()) + "> category"); } else { System.out.println("Wrong selection..."); System.out.println("Exiting program..."); System.exit(1); } break; case (3)://Get LOCATIONS doc = parseUrl(URL, 0); if (doc == null) { print("No connection to the server! Exiting..."); System.exit(1); } Elements locationLinks = doc.select("div[id=paneContainer]").select("a[href*=/locations/]"); System.out.println("E-radio stations available locations: " + "\n"); for (int i = 0; i < locationLinks.size(); i++) { System.out.println(i + 1 + ". " + StringEscapeUtils.unescapeHtml4(locationLinks.get(i).html())); } System.out.print("\n" + "Please make a choise (1-" + locationLinks.size() + "): "); br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } if (choice <= locationLinks.size() && choice >= 1) { theUrls.add(locationLinks.get(choice - 1).attr("abs:href")); System.out.println("...Processing <" + StringEscapeUtils.unescapeHtml4(locationLinks.get(choice - 1).html()) + "> locatino"); } else { System.out.println("Wrong selection!"); System.out.println("Exiting program..."); System.exit(1); } break; case (4): final int YEARLY_RATING = 10; doc = parseUrl(URL, 0); if (doc == null) { print("No connection to the server! Exiting..."); System.exit(1); } Elements ratingsMenu = doc.select("div[class=menuFly]").select("li").select("a[class=hide]"); print("\nStations ratings: \n"); for (int i = 0; i < ratingsMenu.size(); i++) { System.out.println(i + 1 + ". " + StringEscapeUtils.unescapeHtml4(ratingsMenu.get(i).html())); } System.out.print("\n" + "Please make a choise (1-" + ratingsMenu.size() + "): "); br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } /* * The html of the Ratings menu processed * has this structure: * <div> * <ul> * <li> * <ul> * ... * </ul> * </li> * ... * </ul> * </div> */ if (choice <= ratingsMenu.size() && choice >= 1) { //Get the DIV element with class "menuFly" Elements div = doc.select("div[class=menuFly]"); //div Elements list has only one element. So get the children of div Elements ul = div.get(0).children(); //ul Elements list has only one element. So get the children of ul Elements li = ul.get(0).children(); //remove blank elements for (int j = 0; j < li.size(); j++) { if (li.get(j).hasText() == false) li.remove(li.get(j)); } //get the title of user choice and print it out print("\n%s", StringEscapeUtils.unescapeHtml4(ratingsMenu.get(choice - 1).html()) + "\n"); //check if there is a sub-menu Elements ulTag = li.get(choice - 1).select("ul"); if (ulTag.hasText() == true) { Elements subMenu = ulTag.select("li").select("a[href]"); //print the sub-menu for (int j = 0; j < subMenu.size(); j++) print("%s. %s ", j + 1, StringEscapeUtils.unescapeHtml4(subMenu.get(j).html())); System.out.print("\n" + "Please make a choise (1-" + subMenu.size() + "): "); //read user input br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } if (choice <= subMenu.size() && choice >= 1) { theUrls.add(subMenu.get(choice - 1).attr("abs:href")); System.out.println("...Processing the <" + StringEscapeUtils.unescapeHtml4(subMenu.get(choice - 1).html()) + "> category"); } else { System.out.println("Wrong selection!"); System.out.println("Exiting program..."); System.exit(1); } } else { if (choice == YEARLY_RATING) { String url = li.get(choice - 1).select("a[href").attr("abs:href"); doc = parseUrl(url, 0); if (doc != null) { Elements yearTopSubMenu = doc.select("div[id=maintabsid]").select("a[href]"); //print the sub-menu for (int i = 0; i < yearTopSubMenu.size(); i++) print("%s. %s", i + 1, StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(i).html())); System.out.print("\n" + "Please make a choise (1-" + yearTopSubMenu.size() + "): "); //read user input br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } if (choice <= yearTopSubMenu.size() && choice >= 1) { if (choice == 1) { theUrls.add(yearTopSubMenu.get(choice - 1).attr("abs:href")); print("...Processing the <" + StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(choice - 1).html()) + "> category"); } else if (choice == 2) { String link = yearTopSubMenu.get(choice - 1).attr("abs:href"); doc = parseUrl(link, 0); //print menu title print("\n%s", StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(choice - 1).html()) + "\n"); if (doc != null) { Elements elem = doc.select("select[id=selectoption]") .select("option[value]"); ArrayList<Integer> nums = new ArrayList<Integer>(); for (int i = 0; i < elem.size(); i++) { //get the select category values and print the sub-menu int num = Integer.parseInt(elem.get(i).attr("value")); //add them to list nums.add(num); print("%s. %s", i + 1, StringEscapeUtils.unescapeHtml4( elem.get(i).html().replace("Select category: ", ""))); } System.out.print("\n" + "Please make a choise (1-" + elem.size() + "): "); //read user input br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } if (choice <= elem.size() && choice >= 1) { int num = nums.get(choice - 1); String added = "max=100&id=" + num + "&"; String newlink = link.replace("max=100&", added); //print("\nlink: %s", newlink); DEBUG print theUrls.add(newlink); System.out .println("...Processing the <" + StringEscapeUtils.unescapeHtml4(elem.get(choice - 1) .html().replace("Select category: ", "")) + "> category"); print(elem.get(choice - 1).select("a[href]").attr("abs:href")); } else { System.out.println("Wrong selection!"); System.out.println("Exiting program..."); System.exit(1); } } else { System.out.println("ERROR: Cannot get links from server!"); System.out.println("Exiting program..."); System.exit(1); } } else { String link = yearTopSubMenu.get(choice - 1).attr("abs:href"); doc = parseUrl(link, 0); //print menu title print("\n%s", StringEscapeUtils.unescapeHtml4(yearTopSubMenu.get(choice - 1).html()) + "\n"); if (doc != null) { Elements elem = doc.select("select[id=selectoption]") .select("option[value]"); ArrayList<Integer> nums = new ArrayList<Integer>(); for (int i = 0; i < elem.size(); i++) { //get the select category values and print the sub-menu int num = Integer.parseInt(elem.get(i).attr("value")); //add them to list nums.add(num); print("%s. %s", i + 1, StringEscapeUtils.unescapeHtml4( elem.get(i).html().replace("Select location: ", ""))); } System.out.print("\n" + "Please make a choise (1-" + elem.size() + "): "); //read user input br = new BufferedReader(new InputStreamReader(System.in)); try { choice = Integer.parseInt(br.readLine()); } catch (IOException e) { System.out.println("Error!"); System.exit(1); } if (choice <= elem.size() && choice >= 1) { int num = nums.get(choice - 1); String[] linkParts = link.split("&", 4); String finalLink = linkParts[0] + "&" + linkParts[1] + "&" + "id=" + num + "&" + linkParts[3]; //print("\nlink: %s \n link2: %s \n link3: %s \n link: %s \nsize: %s", linkParts[0], linkParts[1], linkParts[2], linkParts[3], linkParts.length); // DEBUG print //print(finalLink); theUrls.add(finalLink); System.out .println("...Processing the <" + StringEscapeUtils.unescapeHtml4(elem.get(choice - 1) .html().replace("Select category: ", "")) + "> category"); print(elem.get(choice - 1).select("a[href]").attr("abs:href")); } else { System.out.println("Wrong selection!"); System.out.println("Exiting program..."); System.exit(1); } } else { System.out.println("ERROR: Cannot get links from server!"); System.out.println("Exiting program..."); System.exit(1); } } } else { System.out.println("Wrong selection!"); System.out.println("Exiting program..."); System.exit(1); } } else { System.out.println("ERROR: Cannot get links from server!"); System.out.println("Exiting program..."); System.exit(1); } } else { theUrls.add(li.get(choice - 1).select("a[href").attr("abs:href")); System.out.println("...Processing the <" + StringEscapeUtils.unescapeHtml4(ratingsMenu.get(choice - 1).html()) + "> category"); print(li.get(choice - 1).select("a[href]").attr("abs:href")); } } } else { System.out.println("Wrong selection!"); System.out.println("Exiting program..."); System.exit(1); } break; case (5): System.out.println("Exiting program..."); System.exit(0); break; default: System.out.println("Invalid choice! Exiting..."); System.exit(1); break; } }
From source file:org.aliuge.crawler.jobconf.ExtractConfig.java
/** * ????/*from w w w .j a v a2s.c om*/ * @param doc * @return * @throws ConfigurationException */ public ExtractConfig loadConfig(Document doc) { Elements extractElement = doc.select("extract"); super.setJobName(doc.select("job").attr("name")); super.setIndexName(doc.select("job").attr("indexName")); String temp = extractElement.select("threadNum").text(); if (StringUtils.isNotBlank(temp)) { this.threadNum = Integer.parseInt(temp); } Elements templateElement = extractElement.select("extract").select("template"); Iterator<Element> it = templateElement.iterator(); while (it.hasNext()) { Element template = it.next(); ExtractTemplate extractTemplate = new ExtractTemplate(); // ?Url???? Elements urlPatternElement = template.select("url"); List<Pattern> patterns = Lists.newArrayList(); for (Element urlElement : urlPatternElement) { patterns.add(Pattern.compile(urlElement.text())); } extractTemplate.setUrlPattern(patterns); extractTemplate.setName(template.attr("name")); // ??? Elements selectElement = template.select("elements").first().children(); for (Element element : selectElement) { if ("element".equals(element.tagName())) { AbstractElementCssSelector<?> selector = ElementCssSelectorFactory.create(element); extractTemplate.addCssSelector(selector); } else if ("if".equals(element.tagName())) { IFConditions ifConditions = IFConditions.create(element); extractTemplate.addConditions(ifConditions); } } super.setExtractConfig(this); this.templates.add(extractTemplate); } //super.setExtractConfig(this); return this; }
From source file:org.aliuge.crawler.jobconf.FetchConfig.java
/** * ???/* w w w . j av a 2 s .c om*/ * * @param confFile * @return */ @SuppressWarnings("unchecked") public FetchConfig loadConfig(Document confDoc) throws ConfigurationException { try { Document doc = confDoc; super.setJobName(doc.select("job").attr("name")); super.setIndexName(doc.select("job").attr("indexName")); Elements e = doc.select("fetch"); this.type = e.select("type").text(); this.agent = e.select("agent").text(); String temp = e.select("threadNum").text(); if (StringUtils.isNotBlank(temp)) { this.threadNum = Integer.parseInt(temp); } temp = e.select("delayBetweenRequests").text(); if (StringUtils.isNotBlank(temp)) { this.delayBetweenRequests = Integer.parseInt(temp); } temp = e.select("maxDepthOfCrawling").text(); if (StringUtils.isNotBlank(temp)) { this.maxDepthOfCrawling = Integer.parseInt(temp); } temp = e.select("fetchBinaryContent").text(); if (StringUtils.isNotBlank(temp)) { this.fetchBinaryContent = Boolean.parseBoolean(temp); } if (StringUtils.isNotBlank(e.select("maxOutgoingLinksToFollow").text())) { this.maxOutgoingLinksToFollow = Integer.parseInt(e.select("maxOutgoingLinksToFollow").text()); } temp = e.select("fileSuffix").text(); if (StringUtils.isNotBlank(temp)) { this.fileSuffix = temp; } temp = e.select("maxDownloadSizePerPage").text(); if (StringUtils.isNotBlank(temp)) { this.maxDownloadSizePerPage = Integer.parseInt(temp); } temp = e.select("https").text(); if (StringUtils.isNotBlank(temp)) { this.https = Boolean.parseBoolean(temp); } temp = e.select("onlyDomain").text(); if (StringUtils.isNotBlank(temp)) { this.onlyDomain = Boolean.parseBoolean(temp); } temp = e.select("socketTimeoutMilliseconds").text(); if (StringUtils.isNotBlank(temp)) { this.socketTimeoutMilliseconds = Integer.parseInt(temp); } temp = e.select("connectionTimeout").text(); if (StringUtils.isNotBlank(temp)) { this.connectionTimeout = Integer.parseInt(temp); } temp = e.select("maxTotalConnections").text(); if (StringUtils.isNotBlank(temp)) { this.maxTotalConnections = Integer.parseInt(temp); } temp = e.select("maxConnectionsPerHost").text(); if (StringUtils.isNotBlank(temp)) { this.maxConnectionsPerHost = Integer.parseInt(e.select("maxConnectionsPerHost").text()); } temp = e.select("maxConnectionsPerHost").text(); if (StringUtils.isNotBlank(temp)) { this.maxConnectionsPerHost = Integer.parseInt(temp); } temp = e.select("proxy").text(); if (StringUtils.isNotBlank(temp)) { Properties p = PropertyConfigurationHelper.getProperties(temp); this.proxyIps = Lists.newLinkedList(); for (Object o : p.keySet()) { proxyIps.add((String) p.get(o)); } } // seed Elements seeds = doc.select("fetch seeds seed"); for (Element element : seeds) { // WebURL url = new WebURL(); String url = element.text(); if (StringUtils.isBlank(url)) { continue; } url = url.trim(); String area = element.attr("area"); this.seeds.add(url); WebURL areaUrl = new WebURL(area, url); try { PendingManager.getPendingArea(super.getJobName()).addElement(areaUrl); } catch (QueueException e1) { log.error("", e1); e1.printStackTrace(); } // BloomfilterHelper.getInstance().add(url.getURL()); } /* * ??Url */ Elements fetchUrlFilters = doc.select("fetchUrlFilters filter"); for (Element element : fetchUrlFilters) { String tmp = element.text(); if (StringUtils.isNoneBlank(tmp)) this.fetchUrlFilters.add(element.text()); } /* * ?????Url */ Elements extractUrlfilters = doc.select("extractUrlfilters filter"); for (Element element : extractUrlfilters) { String tmp = element.text(); String tmp_rep = element.attr("replace"); if (StringUtils.isNoneBlank(tmp)) this.extractUrlfilters.add(new KeyValue(tmp, tmp_rep)); } } catch (NumberFormatException e) { throw new ConfigurationException("?" + e.getMessage()); } // super.setFetchConfig(this); return this; }
From source file:org.aliuge.crawler.jobconf.StoreConfig.java
public StoreConfig loadConfig(Document confDoc) throws StorageException { Document doc = confDoc;// w w w. ja v a 2 s . c o m super.setJobName(doc.select("job").attr("name")); super.setIndexName(doc.select("job").attr("indexName")); Elements e = doc.select("store"); this.type = e.select("type").text(); if (StringUtils.isNotBlank(e.select("threadNum").text())) { this.threadNum = Integer.parseInt(e.select("threadNum").text()); } String className = e.select("plugin").text(); if (StringUtils.isNotBlank(className)) { this.pluginClass = className; } if (!StorageType.containsValue(this.type)) { log.info("?" + this.type); throw new StorageException( "???store? mysql,hbase,elasticSearch,localFile,mongodb"); } if (this.type.equalsIgnoreCase(StorageType.hbase.getValue())) { String tName = e.select("table").first().attr("name"); String fName = e.select("family").first().text(); this.hconfig = new HBaseConfig(tName, fName); } else if (this.type.equalsIgnoreCase(StorageType.mongodb.getValue())) { String dbName = e.select("db").first().attr("name"); String collection = e.select("collection").first().text(); String port = e.select("port").text(); String host = e.select("host").text(); this.mongodbConfig = new MongodbConfig(dbName, collection, host, port); } else if (this.type.equalsIgnoreCase(StorageType.mysql.getValue())) { } else if (this.type.equalsIgnoreCase(StorageType.elasticsearch.getValue())) { } // id? String idPolicy = e.select("idPolicy").text(); if (StringUtils.isNotBlank(idPolicy)) { id = EnumUtils.getEnum(IDPolicy.class, idPolicy); if (!IDPolicy.auto.equals(id)) { String pref = e.select("ref").text(); if (StringUtils.isNotBlank(pref)) { this.policyRef = pref; } if (StringUtils.isBlank(this.policyRef)) { try { throw new ConfigurationException("ID??"); } catch (Exception e2) { e2.printStackTrace(); } } } } return this; }
From source file:org.b3log.wordman.word.Main.java
/** * ?.//w w w.jav a 2s . c om * * @param args ? * @throws java.lang.Exception */ public static void main(final String[] args) throws Exception { final Clazz clazz = new Clazz(); clazz.setId(CLASS_ID); clazz.setName(CLASS_NAME); final List<Word> classWords = new ArrayList<Word>(); clazz.setWords(classWords); for (int clazzNum = 1; clazzNum <= CLASS_NUM; clazzNum++) { final Connection.Response response = Jsoup .connect("http://word.iciba.com/?action=words&class=" + clazz.getId() + "&course=" + clazzNum) .userAgent("Mozilla").timeout(TIMEOUT).execute(); final Document document = response.parse(); int classWordCnt = 0; for (int i = 1; i <= PAGE; i++) { final Elements wordList = document.select("ul#word_list_" + i); final Elements wordLi = wordList.select("li"); for (final Element li : wordLi) { final Word word = new Word(); word.setId(UUID.randomUUID().toString().replaceAll("-", "")); final Element w = li.select("div.word_main_list_w").get(0); String spell = w.select("span").get(0).attr("title"); // ?? spell = spell.replace("*", "").replaceAll("\\(.*\\)", "").replace("\\", ""); spell = spell.trim(); word.setWord(spell); if (!checkWord(spell)) { // throw new IllegalStateException(" [" + spell + ']'); } final Element y = li.select("div.word_main_list_y").get(0); word.setPhon(y.select("strong").get(0).text()); word.setPron(y.select("a").get(0).id()); final Element s = li.select("div.word_main_list_s").get(0); word.setPara(s.select("span").get(0).text()); // ??? word.setBuild(""); word.setExample(""); // System.out.println(word.toString()); classWords.add(word); classWordCnt++; } } System.out.println("? [" + clazzNum + "] ??? [" + classWordCnt + "]"); } final StringBuilder sqlBuilder = new StringBuilder(); final List<String> sqls = clazz.toSQLs(); for (final String sql : sqls) { System.out.println(sql); sqlBuilder.append(sql).append(IOUtils.LINE_SEPARATOR); } final OutputStream outputStream = new FileOutputStream(new File("C:\\" + CLASS_NAME + ".sql")); IOUtils.write(sqlBuilder.toString(), outputStream, "UTF-8"); IOUtils.closeQuietly(outputStream); }