Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package Logic; import Main.Database; import static java.lang.System.setProperty; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import static org.openqa.selenium.By.xpath; import org.openqa.selenium.JavascriptExecutor; import org.openqa.selenium.NoSuchElementException; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.By; import org.openqa.selenium.support.ui.ExpectedConditions; import org.openqa.selenium.support.ui.WebDriverWait; /** * * @author jason * * this class utilizes Selenium to grabs job results from LinkedIn by * navigating through the webpages and grabbing job data by appending to database * and Excel. */ public class LinkedinScraper implements Scraper { List<WebElement> linkedInElementsList; List<WebElement> linkedInDateElementsList; int dateCounter = 0; WebElement parent; WebDriver driver = null; WebDriverWait wait = null; Database database = new Database(); String linkedinUrl = "https://www.linkedin.com/jobs/search?keywords="; String userEmail = "lee886040@gmail.com"; String password = "SeleniumJava"; public LinkedinScraper(String query, String location) { //regex to properly input url. query = query.replaceAll("\\s", "%20"); location = location.replaceAll("\\s", "%20"); //System.out.println(query); \ this.linkedinUrl = linkedinUrl + query + "&location=" + location + "&locationId=&trk=jobs_jserp_search_button_execute&searchOrigin=JSERP"; } public String getUrl() { return linkedinUrl; } /* * Logs onto Selenium with dummy account if prompted, else just start * querying for information right away. */ public void fetchJobs(String url) { //open up webdriver, go to the url, input user/password to access linkedin. setProperty("webdriver.chrome.driver", "C:\\Users\\jason\\Desktop\\Selenium Jars\\chromedriver.exe"); driver = new ChromeDriver(); driver.manage().window().maximize(); driver.get(url); try { driver.findElement(xpath("//a[@class='sign-in-link']")).click(); driver.findElement(xpath("//input[@id='session_key-login']")).sendKeys(userEmail); driver.findElement(xpath("//input[@id='session_password-login']")).sendKeys(password); driver.findElement(xpath("//input[@type='submit']")).click(); Thread.sleep(4000); grabResultsLinkedin(); } catch (NoSuchElementException ex) { try { Thread.sleep(4000); grabResultsLinkedin(); } catch (InterruptedException ex1) { Logger.getLogger(LinkedinScraper.class.getName()).log(Level.SEVERE, null, ex1); } } catch (InterruptedException ex) { Logger.getLogger(LinkedinScraper.class.getName()).log(Level.SEVERE, null, ex); } } /** * helper method which grabs the title,link, and date from the WebElement * lists, and appends the information onto the database. Once done adding * all the information to the database in current page, scroll to the next * page, and repeat the process */ private void grabResultsLinkedin() { linkedInElementsList = driver.findElements(xpath("//a[@class='job-title-link']")); linkedInDateElementsList = driver .findElements(xpath("//span[@class='job-date-posted date-posted-or-new']")); for (int i = 0; i < linkedInElementsList.size(); i++) { parent = linkedInElementsList.get(i).findElement(xpath("..")); String parentClassName = parent.getAttribute("class"); //System.out.println(parentClassName); String title = linkedInElementsList.get(i).getText(); String jobLink = linkedInElementsList.get(i).getAttribute("href"); String date; // Based on the webelement, either declare job as new job or grab exact date from the list of job dates because the DOM changes dynamically.. if (parentClassName.contains("new-job")) { date = "Newly Listed"; } else { date = linkedInDateElementsList.get(dateCounter).getText(); dateCounter++; } database.addToDataBase(title, jobLink, date, "linkedInJobs"); } scrollToNextPage(); //System.out.println("There are over " + linkedinTitlesAndUrls.size() + " matches from LinkedIn.com"); } /*navigate to next pages, grab their job urls, and call the addToLinkList() method to populate into ArrayList <String> of urls. At the last page, I will get a NoSuchElementException, * therefore it will end. */ private void scrollToNextPage() { try { ((JavascriptExecutor) driver).executeScript("scroll(0,4000)"); driver.findElement(xpath("//a[@class='next-prev-container next-btn']")).click(); //reset the date counter to correctly discern its place in the arraylist. Thread.sleep(4000); dateCounter = 0; grabResultsLinkedin(); } catch (NoSuchElementException ex) { return; } catch (InterruptedException ex) { Logger.getLogger(LinkedinScraper.class.getName()).log(Level.SEVERE, null, ex); } } }