org.brunocvcunha.taskerbox.impl.jobs.LinkedInJobSeeker.java Source code

Java tutorial

Introduction

Here is the source code for org.brunocvcunha.taskerbox.impl.jobs.LinkedInJobSeeker.java

Source

/**
 * Copyright (C) 2015 Bruno Candido Volpato da Cunha (brunocvcunha@gmail.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.brunocvcunha.taskerbox.impl.jobs;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.brunocvcunha.taskerbox.core.annotation.TaskerboxField;
import org.brunocvcunha.taskerbox.core.http.TaskerboxHttpBox;
import org.brunocvcunha.taskerbox.impl.http.URLOpenerAction;
import org.brunocvcunha.taskerbox.impl.jobs.vo.ScorerResult;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import lombok.Getter;
import lombok.Setter;
import lombok.extern.log4j.Log4j;

@Log4j
public class LinkedInJobSeeker extends DefaultJobSearchChannel {

    @Getter
    @Setter
    private List<Long> openIds = new ArrayList<>();

    @Getter
    @Setter
    @TaskerboxField("Search")
    private String search;

    @Getter
    @Setter
    @TaskerboxField("Countries")
    private String[] countries;

    @Getter
    @Setter
    @TaskerboxField("Postal Code")
    private String postalCode;

    @Getter
    @Setter
    private File toApplyFile;

    @Getter
    @Setter
    @TaskerboxField("External Apply")
    private boolean externalApply;

    @Getter
    @Setter
    private String tempDir = "e:\\tmp";

    @Getter
    @Setter
    @TaskerboxField("User Email")
    private String userEmail;

    @Getter
    @Setter
    @TaskerboxField("User Password")
    private String userPassword;

    @Getter
    @Setter
    @TaskerboxField("Date Facet")
    // 1=1 day ago, 2=2-7 days, 3=8-14 days, 4=15-30 days
    private String dateFacet = "1,2,3";

    @Getter
    @Setter
    @TaskerboxField("Max Job Count")
    private int maxCount;

    private int actionCount = 0;

    public static void main(String[] args) throws Exception {
        LinkedInJobSeeker seeker = new LinkedInJobSeeker();
        seeker.setCountries(new String[] { "us" });
        seeker.setUserEmail("brunocvcunha@gmail.com");
        seeker.setUserPassword("ytqc1001");
        seeker.setSearch("java");
        seeker.setExternalApply(false);
        seeker.setPaused(false);
        seeker.setAction(new URLOpenerAction());
        seeker.setPostalCode("94086");
        seeker.setId("MainRun");

        //open up to 30 tabs
        seeker.setMaxCount(30);

        seeker.setup();
        seeker.execute();
    }

    public void bootstrapLinkedInHttpClient(boolean fetchCookie)
            throws ClientProtocolException, IllegalStateException, IOException, URISyntaxException {
        this.httpClient = TaskerboxHttpBox.getInstance().getHttpClient();

        HttpGet get = new HttpGet("https://www.linkedin.com/");
        HttpResponse getResponse = this.httpClient.execute(get);

        String getContent = EntityUtils.toString(getResponse.getEntity());

        Document getDoc = Jsoup.parse(getContent);

        String loginCsrfParam = getDoc.select("input[name=loginCsrfParam]").attr("value");
        String csrfToken = getDoc.select("input[name=csrfToken]").attr("value");

        logInfo(log, loginCsrfParam);

        HttpPost post = new HttpPost("https://www.linkedin.com/uas/login-submit");
        List<NameValuePair> pairs2 = new ArrayList<>();
        pairs2.add(new BasicNameValuePair("isJsEnabled", "true"));
        pairs2.add(new BasicNameValuePair("source_app", ""));
        pairs2.add(new BasicNameValuePair("session_key", this.userEmail));
        pairs2.add(new BasicNameValuePair("session_password", this.userPassword));
        pairs2.add(new BasicNameValuePair("session_redirect", ""));
        pairs2.add(new BasicNameValuePair("trk", ""));
        pairs2.add(new BasicNameValuePair("loginCsrfParam", loginCsrfParam));
        pairs2.add(new BasicNameValuePair("fromEmail", ""));
        pairs2.add(new BasicNameValuePair("csrfToken", csrfToken));
        pairs2.add(new BasicNameValuePair("sourceAlias", "0_7r5yezRXCiA_H0CRD8sf6DhOjTKUNps5xGTqeX8EEoi"));
        pairs2.add(new BasicNameValuePair("client_ts", "1413507675390"));
        pairs2.add(new BasicNameValuePair("client_r", "a@gmail.com:812661382:422199706:736472965"));
        pairs2.add(new BasicNameValuePair("client_output", "-1850142"));
        pairs2.add(new BasicNameValuePair("client_n", "812661382:422199706:736472965"));
        pairs2.add(new BasicNameValuePair("client_v", "1.0.1"));

        UrlEncodedFormEntity entity2 = new UrlEncodedFormEntity(pairs2);
        post.setEntity(entity2);

        this.httpClient.execute(post);

    }

    @Override
    public void setup() {
        super.setup();
        logInfo(log, "Running setup...");

        try {
            bootstrapLinkedInHttpClient(true);
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IllegalStateException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (URISyntaxException e) {
            e.printStackTrace();
        }
    }

    private boolean handleJob(JSONObject job)
            throws JSONException, ClientProtocolException, IOException, URISyntaxException {
        if (job.getBoolean("isApplied")) {
            return false;
        }

        long jobId = job.getLong("id");

        if (!this.openIds.contains(jobId)) {
            this.openIds.add(jobId);
            // uniqueCount++;
        } else {
            return false;
        }

        String jobTitle = job.getString("fmt_jobTitle").replaceAll("</?B>", "");

        if (!this.externalApply && job.has("sourceDomain")) {
            logInfo(log,
                    jobId + " - " + jobTitle + " - " + job.getString("sourceDomain") + " --> ignored [external]");

            String sourceDomain = job.getString("sourceDomain");
            if (!sourceDomain.contains("jobvite") && !sourceDomain.contains("ziprecruiter")) {
                return true;
            }
        }

        String jobEmployer = job.getString("fmt_companyName");

        String jobUrl = "https://www.linkedin.com/jobs2/view/" + jobId;
        if (alreadyPerformedAction(jobUrl)) {
            return true;
        }

        String location = "";
        if (job.has("fmt_location")) {
            location = job.getString("fmt_location");
        }
        String headline = jobUrl + " - " + location + " - " + jobTitle + " - " + jobEmployer;

        if (job.has("sourceDomain")) {
            String sourceDomain = job.getString("sourceDomain");
            if (this.externalApply && (sourceDomain.contains("empregocerto.uol.com.br")
                    || sourceDomain.contains("jobomas.com") || sourceDomain.contains("curriculum.com.br"))) {
                logInfo(log, "-- Ignored [externalApply - domain " + sourceDomain + "] " + headline);
                addAlreadyPerformedAction(jobUrl);
                return true;
            }
        }

        if (!considerTitle(jobTitle)) {
            logInfo(log, "-- Ignored [title] " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }

        try {
            FileWriter out = new FileWriter(new File(this.tempDir + "\\job-db\\_titles.txt"), true);
            out.write(jobTitle + "\r\n");
            out.close();
        } catch (Exception e) {
        }

        if (!considerEmployer(jobEmployer)) {
            logInfo(log, "-- Ignored [employer] " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }

        if (!considerLocation(location)) {
            logInfo(log, "-- Ignored [location] " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }

        HttpEntity jobEntity = TaskerboxHttpBox.getInstance().getEntityForURL(jobUrl);
        String jobResult = TaskerboxHttpBox.getInstance().readResponseFromEntity(jobEntity);
        Document jobDocument = Jsoup.parse(jobResult);
        Elements elDescription = jobDocument.select("div.description-section").select("div.rich-text");
        Elements elSkills = jobDocument.select("div.skills-section").select("div.rich-text");

        // FileWriter out = new FileWriter(new File(tempDir + "\\job-db\\" + jobId + ".txt"));
        // out.write(elDescription.text() + "\r\n");
        // out.write(elSkills.text());
        // out.close();

        if (!this.externalApply && !jobResult.contains("onsite-apply")) {
            logInfo(log, "-- Ignored [onsite apply] " + headline);
            addAlreadyPerformedAction(jobUrl);

            try {
                Thread.sleep(5000L);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

            return true;
        }

        if (!considerVisaDescription(elDescription.html()) || !considerVisaDescription(elSkills.html())) {
            logInfo(log, "-- Ignored [visa] " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }
        if (!considerExperienceDescription(elDescription.html())
                || !considerExperienceDescription(elSkills.html())) {
            logInfo(log, "-- Ignored [exp] " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }

        ScorerResult result = LinkedInJobDBComparer.getScore(elDescription.html() + " - " + elSkills.html());

        if (result.getScore() < this.requiredScore) {
            logInfo(log,
                    "-- Ignored [scorer] " + result.getScore() + " - " + result.getMatches() + " - " + headline);
            addAlreadyPerformedAction(jobUrl);
            return true;
        }

        headline = headline + " - " + result.getMatches();

        logInfo(log, headline);
        logInfo(log, elDescription.html());

        if (this.actionCount++ == this.maxCount) {
            this.setPaused(true);
            return false;
        }

        performUnique(jobUrl);

        try {
            Thread.sleep(5000L);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        return true;

    }

    @Override
    public BasicClientCookie buildCookie(String name, String value) {
        return TaskerboxHttpBox.buildCookie(name, value, "www.linkedin.com", "/");
    }

    @Override
    protected void execute() throws Exception {
        log.info("Running execute...");

        for (String country : this.countries) {
            try {
                int strikeCount = 0;

                for (int x = 1; x < this.maxPages; x++) {

                    // If channel is paused, stop execution
                    if (this.isPaused() && !this.isForced()) {
                        return;
                    }

                    int uniqueCount = 0;

                    // DefaultHttpClient client =
                    // TaskerboxHttpBox.getInstance().buildNewHttpClient();
                    String seekUrl = "https://www.linkedin.com/vsearch/jj?keywords="
                            + URLEncoder.encode(this.search) + "&countryCode=" + country.toLowerCase()
                            + "&sortBy=DD&orig=JSHP&distance=100&locationType=I&openFacets=L,C,N&page_num=" + x
                            + "&pt=jobs&f_TP=" + this.dateFacet;
                    if (this.postalCode != null && !this.postalCode.isEmpty()) {
                        seekUrl += "&postalCode=" + this.postalCode;
                    }

                    logInfo(log, "... Seeking " + seekUrl);
                    HttpEntity entity = TaskerboxHttpBox.getInstance().getEntityForURL(seekUrl);
                    String result = TaskerboxHttpBox.getInstance().readResponseFromEntity(entity);
                    if (result.contains("<title>Sign Up | LinkedIn</title>")
                            || result.contains("<title>LinkedIn | LinkedIn</title>")
                            || result.contains("<p class=\"signin-link\">Already have an account?")) {
                        logError(log, "Solicitado login... Saindo.");

                        this.bootstrapLinkedInHttpClient(true);
                        continue;
                        // return;
                    }

                    try {
                        JSONArray jobs = new JSONObject(result).getJSONObject("content").getJSONObject("page")
                                .getJSONObject("voltron_unified_search_json").getJSONObject("search")
                                .getJSONArray("results");

                        for (int j = 0; j < jobs.length(); j++) {
                            try {
                                JSONObject idxObject = jobs.getJSONObject(j);
                                if (!idxObject.has("job")) {
                                    continue;
                                }

                                JSONObject job = idxObject.getJSONObject("job");

                                if (handleJob(job)) {
                                    uniqueCount++;
                                }

                            } catch (Exception e) {
                                logError(log, "Exception reading --> " + jobs.get(j));
                                e.printStackTrace();
                            }

                        }

                        if (uniqueCount == 0) {
                            logInfo(log, "Zero unique count. Striking...");

                            strikeCount++;
                            if (strikeCount > 2) {
                                logInfo(log, "BREAK -- ZERO UNIQUE COUNT! STRIKES!");
                                break;
                            }
                        }

                        try {
                            Thread.sleep(10000L);
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                            return;
                        }

                        // If channel is paused, stop execution
                        if (this.isPaused() && !this.isForced()) {
                            logInfo(log, "Channel is paused, interrupting [2]...");
                            return;
                        }

                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

}