Java tutorial
/** * Eventbrite.com Crawler v2.0 * Copyright 19.06.2016 by Jigyasa Grover, @jig08 * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package org.loklak.api.search; import java.io.File; import java.io.FileWriter; import java.io.IOException; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.loklak.server.APIException; import org.loklak.server.APIHandler; import org.loklak.server.AbstractAPIHandler; import org.loklak.server.Authorization; import org.loklak.server.BaseUserRole; import org.loklak.server.Query; import org.loklak.susi.SusiThought; import org.loklak.tools.storage.JSONObjectWithDefault; import javax.servlet.http.HttpServletResponse; public class EventBriteCrawlerService extends AbstractAPIHandler implements APIHandler { private static final long serialVersionUID = 7850249510419661716L; @Override public String getAPIPath() { return "/api/eventbritecrawler.json"; } @Override public BaseUserRole getMinimalBaseUserRole() { return BaseUserRole.ANONYMOUS; } @Override public JSONObject getDefaultPermissions(BaseUserRole baseUserRole) { return null; } @Override public JSONObject serviceImpl(Query call, HttpServletResponse response, Authorization rights, JSONObjectWithDefault permissions) throws APIException { String url = call.get("url", ""); return crawlEventBrite(url); } public static SusiThought crawlEventBrite(String url) { Document htmlPage = null; try { htmlPage = Jsoup.connect(url).get(); } catch (Exception e) { e.printStackTrace(); } String eventID = null; String eventName = null; String eventDescription = null; // TODO Fetch Event Color String eventColor = null; String imageLink = null; String eventLocation = null; String startingTime = null; String endingTime = null; String ticketURL = null; Elements tagSection = null; Elements tagSpan = null; String[][] tags = new String[5][2]; String topic = null; // By default String closingDateTime = null; String schedulePublishedOn = null; JSONObject creator = new JSONObject(); String email = null; Float latitude = null; Float longitude = null; String privacy = "public"; // By Default String state = "completed"; // By Default String eventType = ""; String temp; Elements t; eventID = htmlPage.getElementsByTag("body").attr("data-event-id"); eventName = htmlPage.getElementsByClass("listing-hero-body").text(); eventDescription = htmlPage.select("div.js-xd-read-more-toggle-view.read-more__toggle-view").text(); eventColor = null; imageLink = htmlPage.getElementsByTag("picture").attr("content"); eventLocation = htmlPage.select("p.listing-map-card-street-address.text-default").text(); temp = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content"); if (temp.length() >= 20) { startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content") .substring(0, 19); } else { startingTime = htmlPage.getElementsByAttributeValue("property", "event:start_time").attr("content"); } temp = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content"); if (temp.length() >= 20) { endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content") .substring(0, 19); } else { endingTime = htmlPage.getElementsByAttributeValue("property", "event:end_time").attr("content"); } ticketURL = url + "#tickets"; // TODO Tags to be modified to fit in the format of Open Event "topic" tagSection = htmlPage.getElementsByAttributeValue("data-automation", "ListingsBreadcrumbs"); tagSpan = tagSection.select("span"); topic = ""; int iterator = 0, k = 0; for (Element e : tagSpan) { if (iterator % 2 == 0) { tags[k][1] = "www.eventbrite.com" + e.select("a.js-d-track-link.badge.badge--tag.l-mar-top-2").attr("href"); } else { tags[k][0] = e.text(); k++; } iterator++; } creator.put("email", ""); creator.put("id", "1"); // By Default temp = htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content"); if (temp.length() > 0) { latitude = Float.valueOf( htmlPage.getElementsByAttributeValue("property", "event:location:latitude").attr("content")); } temp = htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content"); if (temp.length() > 0) { longitude = Float.valueOf( htmlPage.getElementsByAttributeValue("property", "event:location:longitude").attr("content")); } // TODO This returns: "events.event" which is not supported by Open // Event Generator // eventType = htmlPage.getElementsByAttributeValue("property", // "og:type").attr("content"); String organizerName = null; String organizerLink = null; String organizerProfileLink = null; String organizerWebsite = null; String organizerContactInfo = null; String organizerDescription = null; String organizerFacebookFeedLink = null; String organizerTwitterFeedLink = null; String organizerFacebookAccountLink = null; String organizerTwitterAccountLink = null; temp = htmlPage.select("a.js-d-scroll-to.listing-organizer-name.text-default").text(); if (temp.length() >= 5) { organizerName = htmlPage.select("a.js-d-scroll-to.listing-organizer-name.text-default").text() .substring(4); } else { organizerName = ""; } organizerLink = url + "#listing-organizer"; organizerProfileLink = htmlPage .getElementsByAttributeValue("class", "js-follow js-follow-target follow-me fx--fade-in is-hidden") .attr("href"); organizerContactInfo = url + "#lightbox_contact"; Document orgProfilePage = null; try { orgProfilePage = Jsoup.connect(organizerProfileLink).get(); } catch (Exception e) { e.printStackTrace(); } if (orgProfilePage != null) { t = orgProfilePage.getElementsByAttributeValue("class", "l-pad-vert-1 organizer-website"); if (t != null) { organizerWebsite = orgProfilePage .getElementsByAttributeValue("class", "l-pad-vert-1 organizer-website").text(); } else { organizerWebsite = ""; } t = orgProfilePage.select("div.js-long-text.organizer-description"); if (t != null) { organizerDescription = orgProfilePage.select("div.js-long-text.organizer-description").text(); } else { organizerDescription = ""; } organizerFacebookFeedLink = organizerProfileLink + "#facebook_feed"; organizerTwitterFeedLink = organizerProfileLink + "#twitter_feed"; t = orgProfilePage.getElementsByAttributeValue("class", "fb-page"); if (t != null) { organizerFacebookAccountLink = orgProfilePage.getElementsByAttributeValue("class", "fb-page") .attr("data-href"); } else { organizerFacebookAccountLink = ""; } t = orgProfilePage.getElementsByAttributeValue("class", "twitter-timeline"); if (t != null) { organizerTwitterAccountLink = orgProfilePage .getElementsByAttributeValue("class", "twitter-timeline").attr("href"); } else { organizerTwitterAccountLink = ""; } } JSONArray socialLinks = new JSONArray(); JSONObject fb = new JSONObject(); fb.put("id", "1"); fb.put("name", "Facebook"); fb.put("link", organizerFacebookAccountLink); socialLinks.put(fb); JSONObject tw = new JSONObject(); tw.put("id", "2"); tw.put("name", "Twitter"); tw.put("link", organizerTwitterAccountLink); socialLinks.put(tw); JSONArray jsonArray = new JSONArray(); JSONObject event = new JSONObject(); event.put("event_url", url); event.put("id", eventID); event.put("name", eventName); event.put("description", eventDescription); event.put("color", eventColor); event.put("background_url", imageLink); event.put("closing_datetime", closingDateTime); event.put("creator", creator); event.put("email", email); event.put("location_name", eventLocation); event.put("latitude", latitude); event.put("longitude", longitude); event.put("start_time", startingTime); event.put("end_time", endingTime); event.put("logo", imageLink); event.put("organizer_description", organizerDescription); event.put("organizer_name", organizerName); event.put("privacy", privacy); event.put("schedule_published_on", schedulePublishedOn); event.put("state", state); event.put("type", eventType); event.put("ticket_url", ticketURL); event.put("social_links", socialLinks); event.put("topic", topic); jsonArray.put(event); JSONObject org = new JSONObject(); org.put("organizer_name", organizerName); org.put("organizer_link", organizerLink); org.put("organizer_profile_link", organizerProfileLink); org.put("organizer_website", organizerWebsite); org.put("organizer_contact_info", organizerContactInfo); org.put("organizer_description", organizerDescription); org.put("organizer_facebook_feed_link", organizerFacebookFeedLink); org.put("organizer_twitter_feed_link", organizerTwitterFeedLink); org.put("organizer_facebook_account_link", organizerFacebookAccountLink); org.put("organizer_twitter_account_link", organizerTwitterAccountLink); jsonArray.put(org); JSONArray microlocations = new JSONArray(); jsonArray.put(new JSONObject().put("microlocations", microlocations)); JSONArray customForms = new JSONArray(); jsonArray.put(new JSONObject().put("customForms", customForms)); JSONArray sessionTypes = new JSONArray(); jsonArray.put(new JSONObject().put("sessionTypes", sessionTypes)); JSONArray sessions = new JSONArray(); jsonArray.put(new JSONObject().put("sessions", sessions)); JSONArray sponsors = new JSONArray(); jsonArray.put(new JSONObject().put("sponsors", sponsors)); JSONArray speakers = new JSONArray(); jsonArray.put(new JSONObject().put("speakers", speakers)); JSONArray tracks = new JSONArray(); jsonArray.put(new JSONObject().put("tracks", tracks)); String userHome = System.getProperty("user.home"); String path = userHome + "/Downloads/EventBriteInfo"; new File(path).mkdir(); try (FileWriter file = new FileWriter(path + "/event.json")) { file.write(event.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/org.json")) { file.write(org.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/social_links.json")) { file.write(socialLinks.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/microlocations.json")) { file.write(microlocations.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/custom_forms.json")) { file.write(customForms.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/session_types.json")) { file.write(sessionTypes.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/sessions.json")) { file.write(sessions.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/sponsors.json")) { file.write(sponsors.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/speakers.json")) { file.write(speakers.toString()); } catch (IOException e1) { e1.printStackTrace(); } try (FileWriter file = new FileWriter(path + "/tracks.json")) { file.write(tracks.toString()); } catch (IOException e1) { e1.printStackTrace(); } SusiThought json = new SusiThought(); json.setData(jsonArray); return json; } }