Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package Logic; import Main.Database; import static java.lang.System.setProperty; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import org.openqa.selenium.By; import static org.openqa.selenium.By.xpath; import org.openqa.selenium.JavascriptExecutor; import org.openqa.selenium.NoSuchElementException; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; /** * * @author jason * * this class utilizes Selenium to grabs job results from Robert Half-Tech by * navigating through the webpages and grabbing job data by appending to * database and Excel. */ public class RobertHalfScraper implements Scraper { WebDriver driver = null; WebDriverWait wait = null; String robertHalfUrl = "https://www.roberthalf.com/technology/job-search?keywords="; List<WebElement> robHalfJobElementsList; List<WebElement> robHalfDatesElementList; Database database = new Database(); String query, location; public RobertHalfScraper(String query, String location) { this.query = query.replaceAll("\\s", "%20"); this.location = location.replaceAll("\\s", "%20"); this.robertHalfUrl = robertHalfUrl + query + "&location=" + location; } public String getUrl() { return robertHalfUrl; } //comb through Rob Half's website using selenium, and add the results into the database. /** * * @param url - the Glassdoor url where we will scrape job postings from * * method that loops through the webpages and use the helper method, * "grabResultsRobertHalf" to append results to the database. */ public void fetchJobs(String url) { try { setProperty("webdriver.chrome.driver", "C:\\Users\\jason\\Desktop\\Selenium Jars\\chromedriver.exe"); driver = new ChromeDriver(); driver.get(url); //wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//td[@class='JobTitle']//a"))); Thread.sleep(4000); grabResultsRobertHalf(); //iterate to the next page and grab those results, up to 20 if possible. ((JavascriptExecutor) driver).executeScript("scroll(0,4000)"); for (int i = 2; i < 20; i++) { driver.findElement(xpath("//li[@data-pg='pg-" + i + "' ]")).click(); grabResultsRobertHalf(); } } catch (NoSuchElementException ex) { } catch (InterruptedException ex) { Logger.getLogger(RobertHalfScraper.class.getName()).log(Level.SEVERE, null, ex); } } /** * helper method to grab the titles,links, and dates of each page and * through each set, add it to the database. */ public void grabResultsRobertHalf() { robHalfJobElementsList = driver.findElements(xpath("//td[@class='JobTitle']//a")); robHalfDatesElementList = driver.findElements(xpath("//span[@class='postDate']")); //grabs the title,link, and date in that respective order. for (int i = 0; i < robHalfJobElementsList.size(); i++) { String title = robHalfJobElementsList.get(i).getText(); String link = robHalfJobElementsList.get(i).getAttribute("href"); String date = robHalfDatesElementList.get(i).getText(); database.addToDataBase(title, link, date, "robHalfJobs"); } } }