Logic.RobertHalfScraper.java Source code

Java tutorial

Introduction

Here is the source code for Logic.RobertHalfScraper.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package Logic;

import Main.Database;
import static java.lang.System.setProperty;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.openqa.selenium.By;
import static org.openqa.selenium.By.xpath;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.NoSuchElementException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;

/**
 *
 * @author jason
 *
 * this class utilizes Selenium to grabs job results from Robert Half-Tech by
 * navigating through the webpages and grabbing job data by appending to
 * database and Excel.
 */
public class RobertHalfScraper implements Scraper {

    WebDriver driver = null;
    WebDriverWait wait = null;
    String robertHalfUrl = "https://www.roberthalf.com/technology/job-search?keywords=";
    List<WebElement> robHalfJobElementsList;
    List<WebElement> robHalfDatesElementList;
    Database database = new Database();
    String query, location;

    public RobertHalfScraper(String query, String location) {
        this.query = query.replaceAll("\\s", "%20");
        this.location = location.replaceAll("\\s", "%20");
        this.robertHalfUrl = robertHalfUrl + query + "&location=" + location;
    }

    public String getUrl() {
        return robertHalfUrl;
    }

    //comb through Rob Half's website using selenium, and add the results into the database.
    /**
     *
     * @param url - the Glassdoor url where we will scrape job postings from
     *
     * method that loops through the webpages and use the helper method,
     * "grabResultsRobertHalf" to append results to the database.
     */
    public void fetchJobs(String url) {
        try {

            setProperty("webdriver.chrome.driver", "C:\\Users\\jason\\Desktop\\Selenium Jars\\chromedriver.exe");
            driver = new ChromeDriver();
            driver.get(url);
            //wait.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//td[@class='JobTitle']//a")));
            Thread.sleep(4000);
            grabResultsRobertHalf();

            //iterate to the next page and grab those results, up to 20 if possible.
            ((JavascriptExecutor) driver).executeScript("scroll(0,4000)");
            for (int i = 2; i < 20; i++) {
                driver.findElement(xpath("//li[@data-pg='pg-" + i + "'  ]")).click();
                grabResultsRobertHalf();
            }
        } catch (NoSuchElementException ex) {
        } catch (InterruptedException ex) {
            Logger.getLogger(RobertHalfScraper.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * helper method to grab the titles,links, and dates of each page and
     * through each set, add it to the database.
     */
    public void grabResultsRobertHalf() {
        robHalfJobElementsList = driver.findElements(xpath("//td[@class='JobTitle']//a"));
        robHalfDatesElementList = driver.findElements(xpath("//span[@class='postDate']"));
        //grabs the title,link, and date in that respective order.
        for (int i = 0; i < robHalfJobElementsList.size(); i++) {
            String title = robHalfJobElementsList.get(i).getText();
            String link = robHalfJobElementsList.get(i).getAttribute("href");
            String date = robHalfDatesElementList.get(i).getText();
            database.addToDataBase(title, link, date, "robHalfJobs");
        }
    }
}