Logic.LinkedinScraper.java Source code

Introduction

Here is the source code for Logic.LinkedinScraper.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package Logic;

import Main.Database;
import static java.lang.System.setProperty;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import static org.openqa.selenium.By.xpath;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.NoSuchElementException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.By;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;

/**
 *
 * @author jason
 *
 * this class utilizes Selenium to grabs job results from LinkedIn by
 * navigating through the webpages and grabbing job data by appending to database
 * and Excel.
 */
public class LinkedinScraper implements Scraper {

    List<WebElement> linkedInElementsList;
    List<WebElement> linkedInDateElementsList;
    int dateCounter = 0;
    WebElement parent;
    WebDriver driver = null;
    WebDriverWait wait = null;
    Database database = new Database();
    String linkedinUrl = "https://www.linkedin.com/jobs/search?keywords=";
    String userEmail = "lee886040@gmail.com";
    String password = "SeleniumJava";

    public LinkedinScraper(String query, String location) {
        //regex to properly input url.
        query = query.replaceAll("\\s", "%20");
        location = location.replaceAll("\\s", "%20");
        //System.out.println(query);  \
        this.linkedinUrl = linkedinUrl + query + "&location=" + location
                + "&locationId=&trk=jobs_jserp_search_button_execute&searchOrigin=JSERP";
    }

    public String getUrl() {
        return linkedinUrl;
    }

    /*    
     * Logs onto Selenium with dummy account if prompted, else just start
     * querying for information right away.
     */
    public void fetchJobs(String url) {
        //open up webdriver, go to the url, input user/password to access linkedin.
        setProperty("webdriver.chrome.driver", "C:\\Users\\jason\\Desktop\\Selenium Jars\\chromedriver.exe");
        driver = new ChromeDriver();
        driver.manage().window().maximize();
        driver.get(url);
        try {
            driver.findElement(xpath("//a[@class='sign-in-link']")).click();
            driver.findElement(xpath("//input[@id='session_key-login']")).sendKeys(userEmail);
            driver.findElement(xpath("//input[@id='session_password-login']")).sendKeys(password);
            driver.findElement(xpath("//input[@type='submit']")).click();
            Thread.sleep(4000);
            grabResultsLinkedin();
        } catch (NoSuchElementException ex) {
            try {
                Thread.sleep(4000);
                grabResultsLinkedin();
            } catch (InterruptedException ex1) {
                Logger.getLogger(LinkedinScraper.class.getName()).log(Level.SEVERE, null, ex1);
            }
        } catch (InterruptedException ex) {
            Logger.getLogger(LinkedinScraper.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    /**
     * helper method which grabs the title,link, and date from the WebElement
     * lists, and appends the information onto the database. Once done adding
     * all the information to the database in current page, scroll to the next
     * page, and repeat the process
     */
    private void grabResultsLinkedin() {
        linkedInElementsList = driver.findElements(xpath("//a[@class='job-title-link']"));
        linkedInDateElementsList = driver
                .findElements(xpath("//span[@class='job-date-posted date-posted-or-new']"));
        for (int i = 0; i < linkedInElementsList.size(); i++) {
            parent = linkedInElementsList.get(i).findElement(xpath(".."));
            String parentClassName = parent.getAttribute("class");
            //System.out.println(parentClassName);

            String title = linkedInElementsList.get(i).getText();
            String jobLink = linkedInElementsList.get(i).getAttribute("href");
            String date;
            // Based on the webelement, either declare job as new job or grab exact date from the list of job dates because the DOM changes dynamically..
            if (parentClassName.contains("new-job")) {
                date = "Newly Listed";
            } else {
                date = linkedInDateElementsList.get(dateCounter).getText();
                dateCounter++;
            }
            database.addToDataBase(title, jobLink, date, "linkedInJobs");
        }
        scrollToNextPage();
        //System.out.println("There are over " + linkedinTitlesAndUrls.size() + " matches from LinkedIn.com");        

    }

    /*navigate to next pages, grab their job urls, and call the addToLinkList() method to populate into ArrayList <String> of urls. At the last page, I will get a NoSuchElementException,
    * therefore it will end.
    */
    private void scrollToNextPage() {
        try {
            ((JavascriptExecutor) driver).executeScript("scroll(0,4000)");
            driver.findElement(xpath("//a[@class='next-prev-container next-btn']")).click();
            //reset the date counter to correctly discern its place in the arraylist.
            Thread.sleep(4000);
            dateCounter = 0;
            grabResultsLinkedin();
        } catch (NoSuchElementException ex) {
            return;
        } catch (InterruptedException ex) {
            Logger.getLogger(LinkedinScraper.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}