List of usage examples for org.jsoup.select Elements select
public Elements select(String query)
From source file:perflab.loadrunnerwrapperjenkins.LoadRunnerWrapper.java
/** * @param htmlSummaryFile - load runner analysis html report file to parse * @param summaryFile - location of summary file to be generated out of loadrunner * html analysis// ww w . j ava2s.c o m */ protected void parseSummaryFile(String htmlSummaryFile, String summaryFile) { try { File input = new File(htmlSummaryFile); Document document = Jsoup.parse(input, "UTF-8"); Document parse = Jsoup.parse(document.html()); Elements table = parse.select("table").select("[summary=Transactions statistics summary table]"); Elements rows = table.select("tr"); logger.println("number of rows in summary file=" + rows.size()); for (Element row : rows) { // logger.println("table element = " + row.toString()); String name = row.select("td[headers=LraTransaction Name]").select("span").text(); if (!name.isEmpty()) { float avgRT = Float.valueOf(row.select("td[headers=LraAverage]").select("span").text()); float minRT = Float.valueOf(row.select("td[headers=LraMinimum]").select("span").text()); float maxRT = Float.valueOf(row.select("td[headers=LraMaximum]").select("span").text()); int passed = Integer.valueOf(row.select("td[headers=LraPass]").select("span").text() .replace(".", "").replace(",", "")); int failed = Integer.valueOf(row.select("td[headers=LraFail]").select("span").text() .replace(".", "").replace(",", "")); // logger.println("Saving Transaction [" + name + "]"); this.transactions.add(new LoadRunnerTransaction(name, minRT, avgRT, maxRT, passed, failed)); } } } catch (IOException e) { logger.println("Can't read LoadRunner Analysis html report " + e.getMessage()); } }
From source file:Search.DataManipulation.DataParser.java
public String getIcon(Document dom) throws IOException { Elements iconClass = dom.getElementsByClass("cover-container"); Elements iconClass1 = iconClass.select("img.cover-image[alt=Cover art]"); String iconUrl = iconClass1.first().attr("src"); byte[] iconByte = dataHandler.imageDownloader(iconUrl); if (iconByte.length == 0) { log.warn("Invalid Icon url found by Search.DataManipulation.DataValidator, not adding to appData"); return null; } else {//from ww w . j a v a2 s . com String icon = Base64.getEncoder().encodeToString(iconByte); return icon; } }
From source file:webscrap.WebScrap.java
/** * @param args the command line arguments *//*from www . j a va 2 s . co m*/ public static void main(String[] args) { // TODO code application logic here Document doc; try { doc = Jsoup.connect( "http://www.metmuseum.org/collection/the-collection-online/search/15538?pos=1&rpp=30&pg=1&rndkey=20150122&ft=*&deptids=2") .get(); File jsonFile = new File("Records.json"); FileWriter output = new FileWriter(jsonFile); JSONArray store = new JSONArray(); //Declarations for JSON output String nameTag = "Name"; String name; String artistTag = "Artist"; String artistName; String imgURLTag = "imgURL"; String imgsrc; String dateTag = "Date"; String date; String geoTag = "Geography"; String geoVal; String cultureTag = "Culture"; String culture; String mediumTag = "Medium"; String medium; String dimTag = "Dimension"; String dim; String classTag = "Classification"; String classification; String credit_line_tag = "Credit_Line"; String credit_line; String accessNumTag = "Accession_Number"; String accessNum; String RnRTag = "Rights_and_Reproduction"; String RnR; //trying to load the next urls String next = "http://www.metmuseum.org/collection/the-collection-online/search/11432?pos=1&rpp=30&pg=1&rndkey=20150123&ft=*&deptids=2"; int i = 500; while (i != 0) { name = ""; artistName = ""; imgsrc = ""; date = ""; //geoVal = "not available"; //culture = "not available"; medium = ""; dim = ""; classification = ""; credit_line = ""; accessNum = ""; //RnR = "not available"; doc = Jsoup.connect(next).get(); String o_title = doc.getElementsByTag("h2").text(); String[] part_o = o_title.split("Email"); String part_o1 = part_o[0]; String part_o2 = part_o[1]; //System.out.println(o_title); name = part_o1; //String artist = doc.getElementsByTag("h3").text(); //System.out.println(artist); //artistName = artist; Elements imgdiv = doc.select("div#inner-image-container img"); for (Element e : imgdiv) { imgsrc = e.absUrl("src"); } Elements divs; divs = doc.select("div.tombstone"); Elements divchild; divchild = divs.select("div"); int count = 0; for (Element div : divchild) { String info = div.text(); if (count != 0) { String[] parts = info.split(":"); String part1 = parts[0]; String part2 = parts[1]; switch (part1) { case "Artist": artistName = part2; break; case "Date": date = part2; break; case "Geography": geoVal = part2; break; case "Culture": culture = part2; break; case "Medium": medium = part2; break; case "Dimensions": dim = part2; break; case "Classification": classification = part2; break; case "Credit Line": credit_line = part2; break; case "Accession Number": accessNum = part2; break; case "Rights and Reproduction": RnR = part2; break; } } count++; } if (classification.equals(" Paintings")) { //System.out.println(nameTag+name); //System.out.println(artistTag+artistName); //System.out.println(imgURLTag+imgsrc); //System.out.println(dateTag+date); //System.out.println(mediumTag+medium); //System.out.println(dimTag+dim); //System.out.println(classTag+classification); //System.out.println(credit_line_tag+credit_line); //System.out.println(accessNumTag+accessNum); //System.out.println(i); //json writing JSONObject jsonObj = new JSONObject(); jsonObj.put(nameTag, name); jsonObj.put(artistTag, artistName); jsonObj.put(imgURLTag, imgsrc); jsonObj.put(dateTag, date); jsonObj.put(mediumTag, medium); jsonObj.put(dimTag, dim); jsonObj.put(classTag, classification); jsonObj.put(credit_line_tag, credit_line); jsonObj.put(accessNumTag, accessNum); store.add(jsonObj); i--; } //going to next page Element link = doc.select("a.next").first(); next = link.attr("abs:href"); } output.write(store.toJSONString()); output.write("\n"); output.flush(); output.close(); } catch (IOException e) { } }