List of usage examples for org.jsoup.nodes Document select
public Elements select(String cssQuery)
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static void reformatXHtmlHeadings(final Document document, final String selector) { final Elements elements = document.select(selector); for (final Element element : elements) { final String text = element.text(); final String strongHeaderText = String.format("<strong>%s</strong>", text); element.html(strongHeaderText);/*from w w w . jav a 2 s. c om*/ } }
From source file:com.entertailion.android.slideshow.utils.Utils.java
/** * Determine if there is a high resolution icon available for the web site. * /* w ww . j a va2s. c o m*/ * @param context * @param url * @return */ public static final String getWebSiteIcon(Context context, String url) { String icon = null; if (url != null) { String data = Utils.getCachedData(context, url, true); if (data != null) { Document doc = Jsoup.parse(data); if (doc != null) { String href = null; Elements metas = doc.select("meta[itemprop=image]"); if (metas.size() > 0) { Element meta = metas.first(); href = meta.attr("abs:content"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + meta.attr("content"); } } if (href == null || href.trim().length() == 0) { // Find the Microsoft tile icon metas = doc.select("meta[name=msapplication-TileImage]"); if (metas.size() > 0) { Element meta = metas.first(); href = meta.attr("abs:content"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + meta.attr("content"); } } } if (href == null || href.trim().length() == 0) { // Find the Apple touch icon Elements links = doc.select("link[rel=apple-touch-icon]"); if (links.size() > 0) { Element link = links.first(); href = link.attr("abs:href"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + link.attr("href"); } } } if (href == null || href.trim().length() == 0) { // Find the Facebook open graph icon metas = doc.select("meta[property=og:image]"); if (metas.size() > 0) { Element link = metas.first(); href = link.attr("abs:content"); // weird jsoup bug: abs doesn't always work if (href == null || href.trim().length() == 0) { href = url + link.attr("content"); } } } if (href != null && href.trim().length() > 0) { try { Bitmap bitmap = Utils.getBitmapFromURL(href); if (bitmap != null) { icon = "web_site_icon_" + Utils.clean(href) + ".png"; Utils.saveToFile(context, bitmap, bitmap.getWidth(), bitmap.getHeight(), icon); bitmap.recycle(); } } catch (Exception e) { Log.d(LOG_TAG, "getWebSiteIcon", e); } } } } } return icon; }
From source file:io.andyc.papercut.api.PrintApi.java
/** * Parses the set number of copies page and builds the data required to * submit the form/* ww w . ja v a2s . co m*/ * * @param printJob {PrintJon} - the print job in question * @param prevDoc {Document} - the HTML page containing the form to set the * number of copies to be printed * * @return {Map<String, String>} - a HashMap containing the form data */ static Map<String, String> buildSetNumberOfCopiesData(Document prevDoc, PrintJob printJob) { Map<String, String> result = new HashMap<>(); for (Element element : prevDoc.select("form").select("input")) { String name = element.attr("name"); String value = element.attr("value"); if (Objects.equals(name, "$Submit$0")) { continue; } if (Objects.equals(name, "copies")) { value = String.valueOf(printJob.getCopies()); } if (Objects.equals(value, "")) { continue; } result.put(name, value); } return result; }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text getBoardContent(String url, DBCollection boardsCollection) throws JSONException { // NOTE: only board information is crawled. the pins are left to the expanding process Document html = null; JSONObject board = new JSONObject(); try {//w w w. j a v a 2 s .c o m html = Jsoup.connect(url).get(); } catch (Exception e) { return new Text("HTTP connection failed..."); } // board major information String[] tmp = url.split("/"); String boardID = tmp[4]; String boardOwnrID = tmp[3]; String boardName = html.select("h1[class=boardName]").text().trim(); String boardDesp = html.select("p[class=description]").text().trim(); String boardOwnr = html.select("h4[classs=fullname]").text().trim(); // Contained Pins Elements pinsCont = html.select("div[class=pinWrapper]"); JSONArray pins = new JSONArray(); for (Element pinCont : pinsCont) { JSONObject pin = new JSONObject(); pin.append("src", pinCont.select("div[class=pinHolder]>a").first().attr("href")); pins.put(pin); } board.append("ID", boardID); board.append("owner_id", boardOwnrID); board.append("src", url); board.append("name", boardName); board.append("description", boardDesp); board.append("owner", boardOwnr); board.append("pins", pins); // Optional: push data to database BasicDBObject dbObject = (BasicDBObject) JSON.parse(board.toString()); boardsCollection.insert(dbObject); return new Text(board.toString()); }
From source file:dsll.pinterest.crawler.Reduce.java
private static Text getPinContent(String url, DBCollection pinsCollection) throws JSONException { Document html = null; JSONObject pin = new JSONObject(); try {/*from w w w . j av a 2s. c o m*/ html = Jsoup.connect(url).get(); } catch (Exception e) { return new Text("HTTP connection failed..."); } // Gather major pins data Element doc = html.select("body").first(); // Pin ID String id = (url.split("pin/")[1].split("/")[0]); pin.append("ID", id); // Pin image String imageURL = ""; Element tmp = doc.select("div[class=pinImageSourceWrapper]").first(); try { tmp = tmp.select("div[class=imageContainer]").select("img").first(); imageURL = tmp.attr("src"); } catch (Exception e) { } // try{ // ByteArrayOutputStream pimg=new ByteArrayOutputStream(), cimg = new ByteArrayOutputStream(); // for(int i=0; i<3; i++){ // BufferedImage img=dummyImage; // try{ // img = ImageIO.read(new URL(imageURL)); // // }catch(Exception e){} // ImageIO.write(img, "jpg", cimg); // if(pimg.size()<cimg.size()){ // pimg = cimg; // } // } // // save to hdfs // Configuration conf = new Configuration(); // FileSystem fs = FileSystem.get(conf); // Path outFile = new Path("/home/hadoop/"+id+".png"); // FSDataOutputStream out = fs.create(outFile); // out.write(pimg.toByteArray()); // // }catch(Exception e){ // e.printStackTrace(); // } pin.append("image", imageURL); //Pin name tmp = doc.select("h2[itemprop=name]").first(); String name = ""; if (tmp != null) { name = tmp.text().trim(); } pin.append("name", name); // Pin source Element sourceCont = doc.select("div[class=sourceFlagWrapper]").first(); JSONObject source = new JSONObject(); if (sourceCont != null) { String title = sourceCont.text().trim(); String src = sourceCont.select("a").first().attr("href"); source.append("title", title); source.append("src", src); } pin.append("source", source); //pin credit JSONObject pinCredit = new JSONObject(); Element credit = doc.select("div[class=pinCredits]").first(); String creditName = "", creditTitle = "", creditSource = ""; try { creditName = credit.select("div[class=creditName]").text().trim(); } catch (Exception e) { } try { creditTitle = credit.select("div[class=creditTitle]").text().trim(); } catch (Exception e) { } try { creditSource = credit.select("a").attr("href"); } catch (Exception e) { } pinCredit.append("name", creditName); pinCredit.append("title", creditTitle); pinCredit.append("src", creditSource); pin.append("credit", pinCredit); //comments JSONArray comments = new JSONArray(); Elements commentsConts = doc.select("div[class=commenterNameCommentText]"); for (Element commentCont : commentsConts) { JSONObject comment = new JSONObject(); Element creatorEle = commentCont.select("div[class=commenterWrapper] a").first(); String creatorName = creatorEle.text().trim(); String creatorSrc = creatorEle.attr("href"); String content = "", raw = ""; Element commentContent = commentCont.select(".commentDescriptionContent").first(); try { content = commentContent.text().trim(); raw = commentContent.html(); comment.append("creator", creatorName); comment.append("creator_url", creatorSrc); comment.append("content", content); comment.append("content_raw", raw); comments.put(comment); } catch (Exception e) { } } pin.append("comments", comments); //pin board link and related pins Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first(); //pin board JSONArray board = new JSONArray(); if (bottomDoc != null) { Element boardEle = bottomDoc.select("div[class=boardHeader]").first(); JSONObject b = new JSONObject(); String boardName = ""; try { boardName = boardEle.select("h3[class=title]").text().trim(); } catch (Exception ee) { } String boardSrc = ""; try { boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim(); } catch (Exception ee) { } b.append("name", boardName); b.append("src", boardSrc); board.put(b); } pin.append("board", board); //CAUTION: what if a pin shows up in different boards? //related pins bottomDoc = doc .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]") .first(); JSONArray relatedPins = new JSONArray(); if (bottomDoc != null) { Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]"); for (Element relatedPinsCont : relatedPinsConts) { JSONObject relatedPin = new JSONObject(); try { relatedPin.append("src", "https://www.pinterest.com" + relatedPinsCont.select("div[class=pinHolder] > a").attr("href")); } catch (Exception e) { } relatedPins.put(relatedPin); } } pin.append("related_pins", relatedPins); // Optional: push data to database BasicDBObject dbObject = (BasicDBObject) JSON.parse(pin.toString()); pinsCollection.insert(dbObject); return new Text(pin.toString()); }
From source file:com.crawler.app.run.CrawlSiteController.java
public static int getPageNumberEnd(String url) { try {/*from w ww . ja v a2 s. c om*/ Connection.Response response = Jsoup.connect(url) //enable for error urls .ignoreHttpErrors(true) //MAXIMUN TIME .timeout(timeOut) //This is to prevent producing garbage by attempting to parse a JPEG binary image .ignoreContentType(true).execute(); int status = response.statusCode(); //after done if (status == 200) { org.jsoup.nodes.Document doc = response.parse(); if (!pageNumberSelect.isEmpty()) { int pageNumber = -1; String strPageNumber = doc.select(pageNumberSelect).text(); if (pageNumberIndexBegin > -1 && pageNumberIndexEnd > -1) { String strSplit = "0"; for (int i = pageNumberIndexBegin; i <= pageNumberIndexEnd; i++) { String ch = String.valueOf(strPageNumber.charAt(i)); if (tryParseIntByString(ch)) { strSplit += ch; } } pageNumber = Integer.parseInt(strSplit); } else { pageNumber = Integer.parseInt(strPageNumber); } return pageNumber; } else { // get pagenumber with total product/numberproduct in page String strTotalProduct = ""; if (totalProductPosition > -1) { if (totalProductSelectPosition.isEmpty()) { strTotalProduct = doc.select(totalProductSelect).get(totalProductPosition).text(); } else { strTotalProduct = doc.select(totalProductSelect).get(totalProductPosition) .select(totalProductSelectPosition).text(); } } else { strTotalProduct = doc.select(totalProductSelect).text().toString().trim(); } int totalProduct = -1; int i; // get totalproduct if (totalProductIndexBegin < 0) { String strNumberTotalProduct = "0"; for (i = 0; i < strTotalProduct.length(); i++) { String ch = String.valueOf(strTotalProduct.charAt(i)); if (ch.isEmpty() || ch.equals(" ")) { break; } if (tryParseIntByString(ch)) { strNumberTotalProduct += ch; } } totalProduct = Integer.parseInt(strNumberTotalProduct); } else { String strSplit = "0"; for (i = totalProductIndexBegin; i <= totalProductIndexEnd; i++) { String ch = String.valueOf(strTotalProduct.charAt(i)); if (tryParseIntByString(ch)) { strSplit += ch; } } totalProduct = Integer.parseInt(strSplit); } // get number product in page int numberPage = -1; String strNumberProductInPage = ""; if (numberProductInPagePosition > -1) { if (numberProductInPageSelectPosition.isEmpty()) { strNumberProductInPage = doc.select(numberProductInPageSelect) .get(numberProductInPagePosition).text(); } else { strNumberProductInPage = doc.select(numberProductInPageSelect) .get(numberProductInPagePosition).select(numberProductInPageSelectPosition) .text(); } } else { strNumberProductInPage = doc.select(numberProductInPageSelect).text(); } if (regexFromIndexEnd < 0 && regexToIndexEnd < 0) { String strSplit = "0"; for (i = 0; i < strNumberProductInPage.length(); i++) { String ch = String.valueOf(strNumberProductInPage.charAt(i)); if (ch.isEmpty() || ch.equals(" ")) { break; } if (tryParseIntByString(ch)) { strSplit += ch; } } int numberProductInPage = Integer.parseInt(strSplit); if (totalProduct > -1 && numberProductInPage > 0) { if ((totalProduct % numberProductInPage) == 0) { numberPage = totalProduct / numberProductInPage; } else if (totalProduct > numberProductInPage) { numberPage = totalProduct / numberProductInPage + 1; } } return numberPage; } else { //String[] arrStrNumberProductInPage = strNumberProductInPage.split(regex); //String strNumberProductFrom = arrStrNumberProductInPage[0]; String strTotalProductReplace = String.valueOf(totalProduct); if (!decimalFormatTotalProduct.isEmpty()) { DecimalFormat formatter = new DecimalFormat(decimalFormatTotalProduct); strTotalProductReplace = formatter.format(totalProduct); } strNumberProductInPage = strNumberProductInPage.replace(strTotalProductReplace, ""); int numberFrom = -1; String strSplitFrom = "0"; for (i = regexFromIndexBegin; i <= regexFromIndexEnd; i++) { String ch = String.valueOf(strNumberProductInPage.charAt(i)); if (tryParseIntByString(ch)) { strSplitFrom += ch; } } numberFrom = Integer.parseInt(strSplitFrom); // String strNumberProductTo = arrStrNumberProductInPage[1]; int numberTo = -1; String strSplitTo = "0"; for (i = regexToIndexBegin; i <= regexToIndexEnd; i++) { String ch = String.valueOf(strNumberProductInPage.charAt(i)); if (tryParseIntByString(ch)) { strSplitTo += ch; } } numberTo = Integer.parseInt(strSplitTo); int numberProductInPage = numberTo - numberFrom + 1; if (totalProduct > -1 && numberProductInPage > 0) { if ((totalProduct % numberProductInPage) == 0) { numberPage = totalProduct / numberProductInPage; } else if (totalProduct > numberProductInPage) { numberPage = totalProduct / numberProductInPage + 1; } } return numberPage; } } } else { return -1; } } catch (SocketTimeoutException se) { System.out.println("getContentOnly: SocketTimeoutException"); System.out.println(se.getMessage()); return -1; } catch (Exception e) { System.out.println("getContentOnly: Exception"); e.printStackTrace(); return -1; } }
From source file:com.nineash.hutsync.client.NetworkUtilities.java
/** * Perform 2-way sync with the server-side contacts. We send a request that * includes all the locally-dirty contacts so that the server can process * those changes, and we receive (and return) a list of contacts that were * updated on the server-side that need to be updated locally. * * @param account The account being synced * @param authtoken The authtoken stored in the AccountManager for this * account/*from w w w. j a va 2 s . co m*/ * @param serverSyncState A token returned from the server on the last sync * @param dirtyContacts A list of the contacts to send to the server * @return A list of contacts that we need to update locally */ public static void syncCalendar(Context context, Account account, String authtoken, long serverSyncState) throws JSONException, ParseException, IOException, AuthenticationException { ArrayList<SerializableCookie> myCookies; CookieStore cookieStore = new BasicCookieStore(); DefaultHttpClient hClient = getHttpClient(context); mContentResolver = context.getContentResolver(); final String[] weeknames = { "rota_this_week", "rota_next_week" }; long calendar_id = getCalendar(account); if (calendar_id == -1) { Log.e("CalendarSyncAdapter", "Unable to create HutSync event calendar"); return; } try { myCookies = (ArrayList<SerializableCookie>) fromString(authtoken); } catch (final IOException e) { Log.e(TAG, "IOException when expanding authtoken", e); return; } catch (final ClassNotFoundException e) { Log.e(TAG, "ClassNotFoundException when expanding authtoken", e); return; } for (SerializableCookie cur_cookie : myCookies) { cookieStore.addCookie(cur_cookie.getCookie()); } hClient.setCookieStore(cookieStore); Log.i(TAG, "Syncing to: " + SYNC_CONTACTS_URI); HttpGet httpget = new HttpGet(SYNC_CONTACTS_URI); final HttpResponse resp = hClient.execute(httpget); final String response = EntityUtils.toString(resp.getEntity()); HashMap<Long, SyncEntry> localEvents = new HashMap<Long, SyncEntry>(); ArrayList<Event> events = new ArrayList<Event>(); Pattern p = Pattern.compile("background-color:(#[[a-f][A-F][0-9]]{6})"); Pattern ps = Pattern .compile(".calendar-key span.(\\S+) \\{ background-color:(#[[a-f][A-F][0-9]]{6}); color:#fff; \\}"); if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { //check we are still logged in //if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) { // Log.e(TAG, "Authentication exception in sending dirty contacts"); // throw new AuthenticationException(); //} //if we are logged in Map<String, String> shift_types = new HashMap<String, String>(); int length = weeknames.length; Document doc = Jsoup.parse(response); String full_name = doc.select("a[href*=" + account.name + "/profile]").first().text(); AccountManager mAccountManager = AccountManager.get(context); Account[] the_accounts = mAccountManager.getAccountsByType(Constants.ACCOUNT_TYPE); boolean multiple_accounts = (the_accounts.length > 1); Elements the_styles = doc.select("style"); for (Element the_style : the_styles) { String st_txt = the_style.html(); Matcher ms = ps.matcher(st_txt); while (ms.find()) { // Find each match in turn; String can't do this. String cname = ms.group(1); // Access a submatch group; String can't do this. String ccol = ms.group(2); String rname = doc.select("span." + cname).first().text(); Log.i(TAG, "LOOK: " + cname + ", " + ccol + ", " + rname); shift_types.put(ccol, rname); } } for (int w = 0; w < weeknames.length; w++) { Elements the_dates = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] tr.heading th:not(.skipStyles)"); //for (Element hidden : the_dates) { //0 is Mon, 6 is Sun Element the_date = the_dates.first(); //figure out the year for the Monday. String str_v = the_date.text(); String[] str_sub = str_v.split(" "); str_sub[1] = str_sub[1].trim(); String[] date_split = str_sub[1].split("/"); Calendar c = Calendar.getInstance(); int this_month = c.get(Calendar.MONTH) + 1; int monday_month = Integer.parseInt(date_split[1]); int this_year = c.get(Calendar.YEAR); int monday_year = this_year; if (this_month > monday_month) { monday_year++; } else if (this_month < monday_month) { monday_year--; } SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); Date date = new Date(); if (str_v != null && !str_v.isEmpty()) { String this_date = str_sub[1] + "/" + monday_year; //we need to figure out the year - sometimes its next year try { date = format.parse(this_date); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } Log.i(TAG, "Dates: " + this_date + " - " + date); } //} for (int i = 1; i < 8; ++i) { //1 is monday, 7 is sunday Elements hiddens = doc.select("div.homepage div.accord-content table[id=" + weeknames[w] + "] td:eq(" + Integer.toString(i) + "):not(.skipStyles) div.timeElem"); int add_days = i - 1; for (Element hidden : hiddens) { String str = hidden.text(); if (str != null && !str.isEmpty()) { String style = hidden.attr("style"); String bg_col = ""; Matcher m = p.matcher(style); if (m.find()) { bg_col = m.group(1); // Access a submatch group; String can't do this. } Log.i(TAG, "Time: " + str + "(" + bg_col + ")"); String ev_description = ""; //Location too? if (multiple_accounts) ev_description += full_name + "\n\n"; String[] times = str.split(" - "); String[] start_time = times[0].split(":"); String[] end_time = times[1].split(":"); int add_start_hours = Integer.parseInt(start_time[0]); int add_start_minutes = Integer.parseInt(start_time[1]); int add_finish_hours = Integer.parseInt(end_time[0]); int add_finish_minutes = Integer.parseInt(end_time[1]); String ev_shiftType = ""; if (bg_col != null && !bg_col.isEmpty()) { ev_shiftType = (String) shift_types.get(bg_col); } else { ev_shiftType = "Other"; } String ev_title = ev_shiftType + " Shift"; c.setTime(date); c.add(Calendar.DATE, add_days); c.add(Calendar.HOUR_OF_DAY, add_start_hours); c.add(Calendar.MINUTE, add_start_minutes); Date startDate = c.getTime(); long ev_id = startDate.getTime(); c.setTime(date); c.add(Calendar.DATE, add_days); if (add_finish_hours < add_start_hours) { //shift rolls to next day c.add(Calendar.HOUR_OF_DAY, 24); ev_description += "Shift finishes at " + times[1] + " on the next day\n\n"; } else { c.add(Calendar.HOUR_OF_DAY, add_finish_hours); c.add(Calendar.MINUTE, add_finish_minutes); } Date endDate = c.getTime(); Event ev = new Event(ev_id, ev_title, startDate, endDate, ev_description, ev_shiftType); events.add(ev); Log.i(TAG, "Event: " + ev); } } } } //next merge adjacent shifts SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm"); Event prev_event = null; for (Iterator<Event> it = events.iterator(); it.hasNext();) { Event cur_event = it.next(); if (prev_event != null) { if (prev_event.getEndDate().compareTo(cur_event.getStartDate()) == 0) { prev_event.setDescription(prev_event.getDescription() + "Merged consecutive shifts:\n" + timeFormat.format(prev_event.getStartDate()) + " to " + timeFormat.format(prev_event.getEndDate()) + " (" + prev_event.getShiftType() + ")\n" + timeFormat.format(cur_event.getStartDate()) + " to " + timeFormat.format(cur_event.getEndDate()) + " (" + cur_event.getShiftType() + ")\n\n"); prev_event.setEndDate(cur_event.getEndDate()); //TODO: only merge if other + FOH/BOH, note times in new description it.remove(); } } prev_event = cur_event; } //next, load local events Cursor c1 = mContentResolver.query( Events.CONTENT_URI.buildUpon().appendQueryParameter(Events.ACCOUNT_NAME, account.name) .appendQueryParameter(Events.ACCOUNT_TYPE, account.type).build(), new String[] { Events._ID, Events._SYNC_ID }, Events.CALENDAR_ID + "=?", new String[] { String.valueOf(calendar_id) }, null); while (c1 != null && c1.moveToNext()) { //if(is_full_sync) { // deleteEvent(context, account, c1.getLong(0)); //} else { SyncEntry entry = new SyncEntry(); entry.raw_id = c1.getLong(0); localEvents.put(c1.getLong(1), entry); //} } c1.close(); try { ArrayList<ContentProviderOperation> operationList = new ArrayList<ContentProviderOperation>(); for (Event event : events) { if (localEvents.containsKey(Long.valueOf(event.getId()))) { SyncEntry entry = localEvents.get(Long.valueOf(event.getId())); operationList.add(updateEvent(calendar_id, account, event, entry.raw_id)); } else { operationList.add(updateEvent(calendar_id, account, event, -1)); } if (operationList.size() >= 50) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } operationList.clear(); } } if (operationList.size() > 0) { try { mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList); } catch (Exception e) { e.printStackTrace(); } } } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); return; } } else { Log.e(TAG, "Server error in sending dirty contacts: " + resp.getStatusLine()); throw new IOException(); } }
From source file:de.luhmer.owncloudnewsreader.reader.GoogleReaderApi.GoogleReaderMethods.java
@SuppressWarnings("unused") public static ArrayList<FolderSubscribtionItem> getSubList(String _USERNAME, String _PASSWORD) throws UnsupportedEncodingException, IOException { ArrayList<FolderSubscribtionItem> _SUBTITLE_ARRAYLIST = new ArrayList<FolderSubscribtionItem>(); Document doc = Jsoup.connect(GoogleReaderConstants._SUBSCRIPTION_LIST_URL) .header("Authorization", GoogleReaderConstants._AUTHPARAMS + AuthenticationManager.getGoogleAuthKey(_USERNAME, _PASSWORD)) .userAgent(GoogleReaderConstants.APP_NAME).timeout(5000).get(); Elements objects = doc.select("object"); Element element = objects.get(0); Node childTemp = element.childNodes().get(0); List<Node> childs = childTemp.childNodes(); for (Node node : childs) { Elements links = ((Element) node).select("string"); String idFeed = null;/*from w w w.java2s. c om*/ String feedName; String parentSubscriptionName; for (Element link : links) { String tagAttrib = link.attr("name"); String tagText = link.text(); if (tagAttrib.equals("id") && idFeed == null) idFeed = tagText; else if (tagAttrib.equals("title")) feedName = tagText; else if (tagAttrib.equals("label")) parentSubscriptionName = tagText; } //String idFeed = node.attr("id"); //String name = node.attr("title"); //_SUBTITLE_ARRAYLIST.add(new FolderSubscribtionItem(feedName, -1, idFeed, parentSubscriptionName));//TODO implements this again... ? Update FolderSubscribtionItem } //String[] _SUBTITLE_ARRAY = new String[_SUBTITLE_ARRAYLIST.size()]; //_SUBTITLE_ARRAYLIST.toArray(_SUBTITLE_ARRAY); return _SUBTITLE_ARRAYLIST; }
From source file:net.slkdev.swagger.confluence.service.impl.XHtmlToConfluenceServiceImpl.java
private static List<ConfluencePage> handlePagination() { final List<ConfluencePage> confluencePages = new ArrayList<>(); final SwaggerConfluenceConfig swaggerConfluenceConfig = SWAGGER_CONFLUENCE_CONFIG.get(); final PaginationMode paginationMode = swaggerConfluenceConfig.getPaginationMode(); final Document originalDocument = SWAGGER_DOCUMENT.get(); final Document transformedDocument = originalDocument.clone(); final Elements categoryElements = transformedDocument.select(".sect1"); // Remove ToC form the transformed document final Elements toc = transformedDocument.select(".toc"); toc.html(""); toc.unwrap();/*from w w w .java2s . c o m*/ // For Single Page Mode, the incoming XHTML can be used directly. if (paginationMode == SINGLE_PAGE) { final ConfluencePage confluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .build(); if (swaggerConfluenceConfig.isIncludeTableOfContentsOnSinglePage()) { confluencePage.setXhtml(originalDocument.html()); } else { confluencePage.setXhtml(transformedDocument.html()); } confluencePages.add(confluencePage); return confluencePages; } // Before beginning further processing, we need to know if we're in individual // page mode or not, as that will effect how we split the DOM. If we're in this // mode then the category pages will contain inner table of contents. final boolean individualPages = (paginationMode == INDIVIDUAL_PAGES); // From here on, if we're still proceeding then we know the meat of the document // will go in sub-pages. So for the master page, we will use the table of contents final Elements tocElements = originalDocument.select(".toc"); final List<String> innerTocXHtmlList = new ArrayList<>(); final Elements innerTocElements = originalDocument.select(".sectlevel2"); for (final Element innerTocElement : innerTocElements) { // If we're in individual page mode, then we collect the inner ToCs if (individualPages) { final StringBuilder tocHtml = new StringBuilder(); tocHtml.append("<div id=\"toc\" class=\"toc\">"); tocHtml.append("<h4 id=\"toctitle\">Table of Contents</h4>"); tocHtml.append("<div><ul class=\"sectlevel1\">"); tocHtml.append(innerTocElement.html()); tocHtml.append("</ul></div></div>"); innerTocXHtmlList.add(tocHtml.toString()); } // If we're in category page mode, then we strip out the inner table of contents. else { innerTocElement.html(""); innerTocElement.unwrap(); } } // Build the Root Page w/ the Appropriate Level of Table of Contents final ConfluencePage rootConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.ROOT).withOriginalTitle(swaggerConfluenceConfig.getTitle()) .withConfluenceTitle(buildConfluenceTitle(swaggerConfluenceConfig.getTitle(), null, null)) .withXhtml(tocElements.html()).build(); confluencePages.add(rootConfluencePage); int category = 1; // Now we process the category pages for (final Element categoryElement : categoryElements) { // Fetch the title from the first child, which is the header element final String categoryTitle = categoryElement.children().first().text(); // If we're in individual mode then we need these to be sub table of contents if (individualPages) { final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(innerTocXHtmlList.get(category - 1)).build(); confluencePages.add(categoryConfluencePage); final Elements individualElements = categoryElement.getElementsByClass("sect2"); int individual = 1; for (final Element individualElement : individualElements) { final String individualTitle = individualElement.children().first().text(); final ConfluencePage individualConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(INDIVIDUAL).withOriginalTitle(individualTitle) .withConfluenceTitle(buildConfluenceTitle(individualTitle, category, individual)) .withXhtml(individualElement.html()).build(); confluencePages.add(individualConfluencePage); individual++; } category++; continue; } // If we're in category mode, we use the remaining page data final ConfluencePage categoryConfluencePage = ConfluencePageBuilder.aConfluencePage() .withPageType(PageType.CATEGORY).withOriginalTitle(categoryTitle) .withConfluenceTitle(buildConfluenceTitle(categoryTitle, category, null)) .withXhtml(categoryElement.html()).build(); confluencePages.add(categoryConfluencePage); category++; } return confluencePages; }
From source file:me.vertretungsplan.parser.UntisCommonParser.java
static String findLastChange(Element doc, SubstitutionScheduleData scheduleData) { String lastChange = null;/*from www . j a v a 2s .c o m*/ boolean lastChangeLeft = false; if (scheduleData != null) { if (scheduleData.getData().has("stand_links")) { // backwards compatibility lastChangeLeft = scheduleData.getData().optBoolean("stand_links", false); } else { lastChangeLeft = scheduleData.getData().optBoolean(PARAM_LAST_CHANGE_LEFT, false); } } if (doc.select("table.mon_head").size() > 0) { Element monHead = doc.select("table.mon_head").first(); lastChange = findLastChangeFromMonHeadTable(monHead); } else if (lastChangeLeft) { final String bodyHtml = doc.select("body").size() > 0 ? doc.select("body").html() : doc.html(); lastChange = bodyHtml.substring(0, bodyHtml.indexOf("<p>") - 1); } else { List<Node> childNodes; if (doc instanceof Document) { childNodes = ((Document) doc).body().childNodes(); } else { childNodes = doc.childNodes(); } for (Node node : childNodes) { if (node instanceof Comment) { Comment comment = (Comment) node; if (comment.getData().contains("<table class=\"mon_head\">")) { Document commentedDoc = Jsoup.parse(comment.getData()); Element monHead = commentedDoc.select("table.mon_head").first(); lastChange = findLastChangeFromMonHeadTable(monHead); break; } } } } return lastChange; }