Example usage for org.jsoup.nodes Element html

List of usage examples for org.jsoup.nodes Element html

Introduction

In this page you can find the example usage for org.jsoup.nodes Element html.

Prototype

public String html() 

Source Link

Document

Retrieves the element's inner HTML.

Usage

From source file:module.entities.NameFinder.RegexNameFinder.java

/**
 * @param args the command line arguments
 *//* w ww .j av  a 2  s . co  m*/
public static void main(String[] args) throws SQLException, IOException {

    if (args.length == 1) {
        Config.configFile = args[0];
    }
    long lStartTime = System.currentTimeMillis();
    Timestamp startTime = new Timestamp(lStartTime);
    System.out.println("Regex Name Finder process started at: " + startTime);
    DB.initPostgres();
    regexerId = DB.LogRegexFinder(lStartTime);
    initLexicons();
    JSONObject obj = new JSONObject();
    TreeMap<Integer, String> consultations = DB.getDemocracitConsultationBody();
    Document doc;
    int count = 0;
    TreeMap<Integer, String> consFoundNames = new TreeMap<>();
    TreeMap<Integer, String> consFoundRoles = new TreeMap<>();
    for (int consId : consultations.keySet()) {
        String consBody = consultations.get(consId);
        String signName = "", roleName = "";
        doc = Jsoup.parse(consBody);
        Elements allPars = new Elements();
        Elements paragraphs = doc.select("p");
        for (Element par : paragraphs) {
            if (par.html().contains("<br>")) {
                String out = "<p>" + par.html().replaceAll("<br>", "</p><p>") + "</p>";
                Document internal_doc = Jsoup.parse(out);
                Elements subparagraphs = internal_doc.select("p");
                allPars.addAll(subparagraphs);
            } else {
                allPars.add(par);
            }
            //                System.out.println(formatedText);
        }
        String signature = getSignatureFromParagraphs(allPars);
        //            System.out.println(signature);
        if (signature.contains("#")) {
            String[] sign_tokens = signature.split("#");
            signName = sign_tokens[0];
            if (sign_tokens.length > 1) {
                roleName = sign_tokens[1];
            }
            consFoundNames.put(consId, signName.trim());
            consFoundRoles.put(consId, roleName.trim());
            count++;
        } else {
            System.err.println("--" + consId);
        }
        //           
    }
    DB.insertDemocracitConsultationMinister(consFoundNames, consFoundRoles);

    TreeMap<Integer, String> consultationsCompletedText = DB.getDemocracitCompletedConsultationBody();
    Document doc2;
    TreeMap<Integer, String> complConsFoundNames = new TreeMap<>();
    int count2 = 0;
    for (int consId : consultationsCompletedText.keySet()) {
        String consBody = consultationsCompletedText.get(consId);
        String signName = "", roleName = "";
        doc2 = Jsoup.parse(consBody);
        //            if (doc.text().contains("<br>")) {
        //                doc.text().replaceAll("(<[Bb][Rr]>)+", "<p>");
        //            }
        Elements allPars = new Elements();
        Elements paragraphs = doc2.select("p");
        for (Element par : paragraphs) {

            if (par.html().contains("<br>")) {
                String out = "<p>" + par.html().replaceAll("<br>", "</p><p>") + "</p>";
                Document internal_doc = Jsoup.parse(out);
                Elements subparagraphs = internal_doc.select("p");
                allPars.addAll(subparagraphs);
            } else {
                allPars.add(par);
            }
        }
        String signature = getSignatureFromParagraphs(allPars);
        if (signature.contains("#")) {
            String[] sign_tokens = signature.split("#");
            signName = sign_tokens[0];
            if (sign_tokens.length > 1) {
                roleName = sign_tokens[1];
            }
            consFoundNames.put(consId, signName.trim());
            consFoundRoles.put(consId, roleName.trim());
            //                System.out.println(consId);
            //                System.out.println(signName.trim());
            //                System.out.println("***************");
            count2++;
        } else {
            System.err.println("++" + consId);
        }
    }
    DB.insertDemocracitConsultationMinister(complConsFoundNames, consFoundRoles);
    long lEndTime = System.currentTimeMillis();
    System.out.println("Regex Name Finder process finished at: " + startTime);
    obj.put("message", "Regex Name Finder finished with no errors");
    obj.put("details", "");
    DB.UpdateLogRegexFinder(lEndTime, regexerId, obj);
    DB.close();
}

From source file:com.mycollab.core.utils.StringUtils.java

/**
 * @param value//  w  ww  .  j ava2 s  .c  o  m
 * @return
 */
public static String formatRichText(String value) {
    if (isBlank(value)) {
        return "";
    }

    value = Jsoup.clean(value,
            relaxed().addTags("img")
                    .addAttributes("img", "align", "alt", "height", "src", "title", "width", "style")
                    .addProtocols("img", "src", "http", "https"));
    Document doc = Jsoup.parse(value);
    Element body = doc.body();
    replaceHtml(body);
    String html = body.html();
    return html.replace("\n", "");
}

From source file:com.geecko.QuickLyric.lyrics.LyricWiki.java

public static Lyrics fromURL(String url, String artist, String song) {
    if (url.endsWith("action=edit")) {
        return new Lyrics(NO_RESULT);
    }//from w  w w .ja v  a2 s  .co  m
    String text;
    try {
        //url = URLDecoder.decode(url, "utf-8");
        Document lyricsPage = Jsoup.connect(url).get();
        Element lyricbox = lyricsPage.select("div.lyricBox").get(0);
        lyricbox.after(lyricbox.childNode(0));
        String lyricsHtml = lyricbox.html();
        text = lyricsHtml.substring(0, lyricsHtml.indexOf("<!--")).replaceAll("<.*?>", "").replaceAll("\n",
                "<br />");
        if (text.contains("&#"))
            text = Parser.unescapeEntities(text, true);
    } catch (IndexOutOfBoundsException | IOException e) {
        e.printStackTrace();
        return new Lyrics(ERROR);
    }

    if (artist == null)
        artist = url.substring(24).replace("Gracenote:", "").split(":", 2)[0].replace('_', ' ');
    if (song == null)
        song = url.substring(24).replace("Gracenote:", "").split(":", 2)[1].replace('_', ' ');

    try {
        artist = URLDecoder.decode(artist, "UTF-8");
        song = URLDecoder.decode(song, "UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    if (text.contains(
            "Unfortunately, we are not licensed to display the full lyrics for this song at the moment.")
            || text.equals("Instrumental <br />")) {
        Lyrics result = new Lyrics(NEGATIVE_RESULT);
        result.setArtist(artist);
        result.setTitle(song);
        return result;
    } else if (text.equals("") || text.length() < 3)
        return new Lyrics(NO_RESULT);
    else {
        Lyrics lyrics = new Lyrics(POSITIVE_RESULT);
        lyrics.setArtist(artist);
        lyrics.setTitle(song);
        lyrics.setText(text);
        lyrics.setSource("LyricsWiki");
        lyrics.setURL(url);
        return lyrics;
    }
}

From source file:io.jari.geenstijl.API.API.java

private static Artikel parseArtikel(Element artikel_el, Context context) throws ParseException {
    Artikel artikel = new Artikel();

    //id//from   w  ww  .  j a va2s .  c o m
    artikel.id = Integer.parseInt(artikel_el.attr("id").substring(1));

    //summary
    artikel.summary = artikel_el.select("a.more").first() != null;

    //titel
    artikel.titel = artikel_el.select("h1").text();

    //plaatje
    if (PreferenceManager.getDefaultSharedPreferences(context).getBoolean("show_images", true)) {
        Element plaatje = artikel_el.select("img").first();
        if (plaatje != null) {
            try {
                String url = plaatje.attr("src");
                Log.d(TAG, "Downloading " + url);
                //                    artikel.plaatje = Drawable.createFromStream(((java.io.InputStream)new URL(plaatje.attr("src")).getContent()), null);
                artikel.plaatje = readBytes((InputStream) new URL(plaatje.attr("src")).getContent());
                artikel.groot_plaatje = plaatje.hasClass("groot");
                if (plaatje.hasAttr("width") && plaatje.hasAttr("height"))
                    if (!plaatje.attr("width").equals("100") || !plaatje.attr("height").equals("100"))
                        artikel.groot_plaatje = true;
                if (artikel.groot_plaatje)
                    Log.i(TAG, "    Done. Big image.");
                else
                    Log.i(TAG, "    Done.");
            } catch (Exception ex) {
                Log.w(TAG, "Unable to download image, Falling back... Reason: " + ex.getMessage());
                artikel.plaatje = null;
            }
        }
    }

    //embed
    if (artikel_el.select("div.embed").first() != null) {
        //atm alleen support voor iframes
        Element frame = artikel_el.select("div.embed>iframe").first();
        if (frame != null)
            artikel.embed = frame.attr("src");
    }

    //embed (geenstijl.tv)
    if (!domain.equals("www.geenstijl.nl")) {
        //extract url from script
        Element scriptEl = artikel_el.select("script").first();
        if (scriptEl != null) {
            String script = scriptEl.html();
            Pattern pattern = Pattern.compile("'(.*)', fall");
            Matcher matcher = pattern.matcher(script);
            if (matcher.find() && matcher.groupCount() == 1) {
                artikel.embed = matcher.group(1);
            }
        }
    }

    //footer shit
    Element footer = artikel_el.select("footer").first();
    SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm", Locale.US);
    artikel.datum = simpleDateFormat.parse(footer.select("time").first().attr("datetime"));

    StringTokenizer footer_items = new StringTokenizer(footer.text(), "|");
    artikel.auteur = footer_items.nextToken().trim();

    artikel.reacties = Integer.parseInt(footer.select("a.comments").text().replace(" reacties", ""));

    artikel.link = footer.select("a").first().attr("href");

    //clean up
    artikel_el.select("h1").remove();
    artikel_el.select(".embed").remove();
    artikel_el.select("img").remove();
    artikel_el.select("footer").remove();
    artikel_el.select("a.more").remove();
    artikel_el.select("script").remove();

    //inhoud
    artikel.inhoud = artikel_el.html();

    return artikel;
}

From source file:dsll.pinterest.crawler.Reduce.java

private static Text getPinContent(String url, DBCollection pinsCollection) throws JSONException {
    Document html = null;/*from w  w w .j a v  a  2s. c  o  m*/
    JSONObject pin = new JSONObject();
    try {
        html = Jsoup.connect(url).get();
    } catch (Exception e) {
        return new Text("HTTP connection failed...");
    }

    // Gather major pins data
    Element doc = html.select("body").first();
    // Pin ID
    String id = (url.split("pin/")[1].split("/")[0]);
    pin.append("ID", id);

    // Pin image
    String imageURL = "";
    Element tmp = doc.select("div[class=pinImageSourceWrapper]").first();
    try {
        tmp = tmp.select("div[class=imageContainer]").select("img").first();
        imageURL = tmp.attr("src");
    } catch (Exception e) {
    }
    //        try{
    //            ByteArrayOutputStream pimg=new ByteArrayOutputStream(), cimg = new ByteArrayOutputStream();
    //            for(int i=0; i<3; i++){ 
    //                BufferedImage img=dummyImage;
    //                try{
    //                    img = ImageIO.read(new URL(imageURL));
    //                
    //                }catch(Exception e){}
    //                ImageIO.write(img, "jpg", cimg);
    //                if(pimg.size()<cimg.size()){
    //                        pimg = cimg;
    //                }
    //            }
    //            // save to hdfs
    //            Configuration conf = new Configuration();
    //            FileSystem fs = FileSystem.get(conf);
    //            Path outFile = new Path("/home/hadoop/"+id+".png");
    //            FSDataOutputStream out = fs.create(outFile);
    //            out.write(pimg.toByteArray());
    //
    //        }catch(Exception e){
    //                e.printStackTrace();
    //        }
    pin.append("image", imageURL);

    //Pin name
    tmp = doc.select("h2[itemprop=name]").first();
    String name = "";
    if (tmp != null) {
        name = tmp.text().trim();
    }
    pin.append("name", name);

    // Pin source
    Element sourceCont = doc.select("div[class=sourceFlagWrapper]").first();
    JSONObject source = new JSONObject();
    if (sourceCont != null) {
        String title = sourceCont.text().trim();
        String src = sourceCont.select("a").first().attr("href");
        source.append("title", title);
        source.append("src", src);
    }
    pin.append("source", source);

    //pin credit
    JSONObject pinCredit = new JSONObject();
    Element credit = doc.select("div[class=pinCredits]").first();
    String creditName = "", creditTitle = "", creditSource = "";
    try {
        creditName = credit.select("div[class=creditName]").text().trim();
    } catch (Exception e) {
    }
    try {
        creditTitle = credit.select("div[class=creditTitle]").text().trim();
    } catch (Exception e) {
    }
    try {
        creditSource = credit.select("a").attr("href");
    } catch (Exception e) {
    }
    pinCredit.append("name", creditName);
    pinCredit.append("title", creditTitle);
    pinCredit.append("src", creditSource);
    pin.append("credit", pinCredit);

    //comments
    JSONArray comments = new JSONArray();
    Elements commentsConts = doc.select("div[class=commenterNameCommentText]");
    for (Element commentCont : commentsConts) {
        JSONObject comment = new JSONObject();
        Element creatorEle = commentCont.select("div[class=commenterWrapper] a").first();
        String creatorName = creatorEle.text().trim();
        String creatorSrc = creatorEle.attr("href");
        String content = "", raw = "";
        Element commentContent = commentCont.select(".commentDescriptionContent").first();
        try {
            content = commentContent.text().trim();
            raw = commentContent.html();
            comment.append("creator", creatorName);
            comment.append("creator_url", creatorSrc);
            comment.append("content", content);
            comment.append("content_raw", raw);
            comments.put(comment);
        } catch (Exception e) {
        }

    }
    pin.append("comments", comments);

    //pin board link and related pins
    Element bottomDoc = doc.select("div[class=Module CloseupSidebar]").first();

    //pin board
    JSONArray board = new JSONArray();
    if (bottomDoc != null) {
        Element boardEle = bottomDoc.select("div[class=boardHeader]").first();
        JSONObject b = new JSONObject();
        String boardName = "";
        try {
            boardName = boardEle.select("h3[class=title]").text().trim();
        } catch (Exception ee) {
        }
        String boardSrc = "";
        try {
            boardSrc = "https://www.pinterest.com" + boardEle.select("a").attr("href").trim();
        } catch (Exception ee) {
        }
        b.append("name", boardName);
        b.append("src", boardSrc);
        board.put(b);
    }
    pin.append("board", board); //CAUTION: what if a pin shows up in different boards?

    //related pins
    bottomDoc = doc
            .select("div[class=closeupBottom] div[class=Module CloseupBottom] div[class=relatedPinsWrapper]")
            .first();

    JSONArray relatedPins = new JSONArray();
    if (bottomDoc != null) {
        Elements relatedPinsConts = bottomDoc.select("div[class=pinWrapper]");
        for (Element relatedPinsCont : relatedPinsConts) {
            JSONObject relatedPin = new JSONObject();
            try {
                relatedPin.append("src", "https://www.pinterest.com"
                        + relatedPinsCont.select("div[class=pinHolder] > a").attr("href"));
            } catch (Exception e) {
            }
            relatedPins.put(relatedPin);
        }
    }
    pin.append("related_pins", relatedPins);

    // Optional: push data to database
    BasicDBObject dbObject = (BasicDBObject) JSON.parse(pin.toString());
    pinsCollection.insert(dbObject);
    return new Text(pin.toString());
}

From source file:com.ferasinfotech.gwreader.ScreenSlidePageFragment.java

/**
 * Alternate Factory method for this fragment class. Constructs a new fragment for the given page number,
 * and HTML story element./*  w ww .j av a  2s.co  m*/
 */
public static ScreenSlidePageFragment create(int pageNumber, int numPages, org.jsoup.nodes.Element story) {
    int story_id = -1;
    String name = "";
    String summary = "";
    String headline = "";
    String cover_photo_url = "";
    String story_string = "";
    long createdAt;

    ScreenSlidePageFragment fragment = new ScreenSlidePageFragment();
    Bundle args = new Bundle();
    if (pageNumber == 0) {
        story_id = 0;
        name = "Grasswire Help";
        headline = "Usage Instructions";
        cover_photo_url = "android.resource://com.ferasinfotech.gwreader/" + R.drawable.gw_logo;
        summary = "Swipe right and left to read each story.\n\n"
                + "Scroll down to read facts and associated news items (tweets and links) for each story.\n\n"
                + "Tap on a news items within a story and you'll be able to follow web links, view tweets via the Twitter app, or watch videos.\n\n"
                + "A long press on a story's cover photo will launch the device browser to view or edit the story on the Grasswire mobile site.\n\n"
                + "A long press on the image above will launch the Grasswire main page.\n\n" + "App Version: "
                + BuildConfig.VERSION_NAME + "\n\n";
    } else {

        // doing a story page, Element 'story' is the story data

        Elements e_list;
        org.jsoup.nodes.Element tag;

        story_id = Integer.valueOf(story.attr("data-story-id"));
        e_list = story.getElementsByClass("feature__tag");
        tag = e_list.get(0);
        name = tag.text() + " (" + pageNumber + "/" + numPages + ")";
        e_list = story.getElementsByClass("story__summary");
        tag = e_list.get(0);
        summary = tag.html().replace("<br />", "\r");
        e_list = story.getElementsByClass("feature__text");
        tag = e_list.get(0);
        headline = tag.text();
        e_list = story.getElementsByClass("feature__image");
        tag = e_list.get(0);
        cover_photo_url = tag.attr("src");
        story_string = story.toString();

    }

    args.putInt(ARG_PAGE, pageNumber);
    args.putInt(ARG_STORY_ID, story_id);
    args.putString(ARG_TITLE, name);
    args.putString(ARG_SUMMARY, summary);
    args.putString(ARG_HEADLINE, headline);
    args.putString(ARG_COVER_PHOTO, cover_photo_url);
    args.putString(ARG_STORY_STRING, "<html><head></head><body>" + story_string + "</body></html>");
    fragment.setArguments(args);
    return fragment;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Element getSingleElementByQuery(Element root, String query) {
    Elements elementsByQuery = root.select(query);
    if (elementsByQuery.size() > 1) {
        String error = "Found " + elementsByQuery.size() + " elements matching query \"" + query
                + "\" inside of " + root.tagName() + "-" + root.className();
        throw new RuntimeException(error + root.html());
    }//from   w ww.  j av a2 s.  c  om
    return elementsByQuery.first();
}

From source file:com.nineash.hutsync.client.NetworkUtilities.java

/**
 * Perform 2-way sync with the server-side contacts. We send a request that
 * includes all the locally-dirty contacts so that the server can process
 * those changes, and we receive (and return) a list of contacts that were
 * updated on the server-side that need to be updated locally.
 *
 * @param account The account being synced
 * @param authtoken The authtoken stored in the AccountManager for this
 *            account//  w  ww.  java2 s .c o  m
 * @param serverSyncState A token returned from the server on the last sync
 * @param dirtyContacts A list of the contacts to send to the server
 * @return A list of contacts that we need to update locally
 */
public static void syncCalendar(Context context, Account account, String authtoken, long serverSyncState)
        throws JSONException, ParseException, IOException, AuthenticationException {
    ArrayList<SerializableCookie> myCookies;
    CookieStore cookieStore = new BasicCookieStore();
    DefaultHttpClient hClient = getHttpClient(context);
    mContentResolver = context.getContentResolver();
    final String[] weeknames = { "rota_this_week", "rota_next_week" };

    long calendar_id = getCalendar(account);
    if (calendar_id == -1) {
        Log.e("CalendarSyncAdapter", "Unable to create HutSync event calendar");
        return;
    }

    try {
        myCookies = (ArrayList<SerializableCookie>) fromString(authtoken);
    } catch (final IOException e) {
        Log.e(TAG, "IOException when expanding authtoken", e);
        return;
    } catch (final ClassNotFoundException e) {
        Log.e(TAG, "ClassNotFoundException when expanding authtoken", e);
        return;
    }

    for (SerializableCookie cur_cookie : myCookies) {
        cookieStore.addCookie(cur_cookie.getCookie());
    }

    hClient.setCookieStore(cookieStore);
    Log.i(TAG, "Syncing to: " + SYNC_CONTACTS_URI);
    HttpGet httpget = new HttpGet(SYNC_CONTACTS_URI);
    final HttpResponse resp = hClient.execute(httpget);
    final String response = EntityUtils.toString(resp.getEntity());
    HashMap<Long, SyncEntry> localEvents = new HashMap<Long, SyncEntry>();
    ArrayList<Event> events = new ArrayList<Event>();
    Pattern p = Pattern.compile("background-color:(#[[a-f][A-F][0-9]]{6})");
    Pattern ps = Pattern
            .compile(".calendar-key span.(\\S+) \\{ background-color:(#[[a-f][A-F][0-9]]{6}); color:#fff; \\}");

    if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
        //check we are still logged in
        //if (resp.getStatusLine().getStatusCode() == HttpStatus.SC_UNAUTHORIZED) {
        //    Log.e(TAG, "Authentication exception in sending dirty contacts");
        //    throw new AuthenticationException();
        //}

        //if we are logged in
        Map<String, String> shift_types = new HashMap<String, String>();
        int length = weeknames.length;
        Document doc = Jsoup.parse(response);
        String full_name = doc.select("a[href*=" + account.name + "/profile]").first().text();

        AccountManager mAccountManager = AccountManager.get(context);
        Account[] the_accounts = mAccountManager.getAccountsByType(Constants.ACCOUNT_TYPE);
        boolean multiple_accounts = (the_accounts.length > 1);

        Elements the_styles = doc.select("style");
        for (Element the_style : the_styles) {
            String st_txt = the_style.html();
            Matcher ms = ps.matcher(st_txt);
            while (ms.find()) { // Find each match in turn; String can't do this.
                String cname = ms.group(1); // Access a submatch group; String can't do this.
                String ccol = ms.group(2);
                String rname = doc.select("span." + cname).first().text();
                Log.i(TAG, "LOOK: " + cname + ", " + ccol + ", " + rname);
                shift_types.put(ccol, rname);
            }
        }

        for (int w = 0; w < weeknames.length; w++) {

            Elements the_dates = doc.select("div.homepage div.accord-content table[id=" + weeknames[w]
                    + "] tr.heading th:not(.skipStyles)");
            //for (Element hidden : the_dates) { //0 is Mon, 6 is Sun
            Element the_date = the_dates.first(); //figure out the year for the Monday.
            String str_v = the_date.text();
            String[] str_sub = str_v.split(" ");
            str_sub[1] = str_sub[1].trim();
            String[] date_split = str_sub[1].split("/");
            Calendar c = Calendar.getInstance();
            int this_month = c.get(Calendar.MONTH) + 1;
            int monday_month = Integer.parseInt(date_split[1]);
            int this_year = c.get(Calendar.YEAR);
            int monday_year = this_year;
            if (this_month > monday_month) {
                monday_year++;
            } else if (this_month < monday_month) {
                monday_year--;
            }

            SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy");
            Date date = new Date();
            if (str_v != null && !str_v.isEmpty()) {
                String this_date = str_sub[1] + "/" + monday_year; //we need to figure out the year - sometimes its next year

                try {
                    date = format.parse(this_date);
                } catch (Exception e) {
                    // TODO Auto-generated catch block  
                    e.printStackTrace();
                }
                Log.i(TAG, "Dates: " + this_date + " - " + date);
            }
            //}

            for (int i = 1; i < 8; ++i) { //1 is monday, 7 is sunday
                Elements hiddens = doc.select("div.homepage div.accord-content table[id=" + weeknames[w]
                        + "] td:eq(" + Integer.toString(i) + "):not(.skipStyles) div.timeElem");
                int add_days = i - 1;
                for (Element hidden : hiddens) {
                    String str = hidden.text();
                    if (str != null && !str.isEmpty()) {
                        String style = hidden.attr("style");
                        String bg_col = "";
                        Matcher m = p.matcher(style);
                        if (m.find()) {
                            bg_col = m.group(1); // Access a submatch group; String can't do this.
                        }

                        Log.i(TAG, "Time: " + str + "(" + bg_col + ")");
                        String ev_description = ""; //Location too?
                        if (multiple_accounts)
                            ev_description += full_name + "\n\n";
                        String[] times = str.split(" - ");
                        String[] start_time = times[0].split(":");
                        String[] end_time = times[1].split(":");
                        int add_start_hours = Integer.parseInt(start_time[0]);
                        int add_start_minutes = Integer.parseInt(start_time[1]);
                        int add_finish_hours = Integer.parseInt(end_time[0]);
                        int add_finish_minutes = Integer.parseInt(end_time[1]);
                        String ev_shiftType = "";
                        if (bg_col != null && !bg_col.isEmpty()) {
                            ev_shiftType = (String) shift_types.get(bg_col);
                        } else {
                            ev_shiftType = "Other";
                        }
                        String ev_title = ev_shiftType + " Shift";

                        c.setTime(date);
                        c.add(Calendar.DATE, add_days);
                        c.add(Calendar.HOUR_OF_DAY, add_start_hours);
                        c.add(Calendar.MINUTE, add_start_minutes);
                        Date startDate = c.getTime();
                        long ev_id = startDate.getTime();

                        c.setTime(date);
                        c.add(Calendar.DATE, add_days);
                        if (add_finish_hours < add_start_hours) { //shift rolls to next day
                            c.add(Calendar.HOUR_OF_DAY, 24);
                            ev_description += "Shift finishes at " + times[1] + " on the next day\n\n";
                        } else {
                            c.add(Calendar.HOUR_OF_DAY, add_finish_hours);
                            c.add(Calendar.MINUTE, add_finish_minutes);
                        }
                        Date endDate = c.getTime();

                        Event ev = new Event(ev_id, ev_title, startDate, endDate, ev_description, ev_shiftType);
                        events.add(ev);
                        Log.i(TAG, "Event: " + ev);
                    }
                }
            }
        }

        //next merge adjacent shifts
        SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm");
        Event prev_event = null;
        for (Iterator<Event> it = events.iterator(); it.hasNext();) {
            Event cur_event = it.next();
            if (prev_event != null) {
                if (prev_event.getEndDate().compareTo(cur_event.getStartDate()) == 0) {
                    prev_event.setDescription(prev_event.getDescription() + "Merged consecutive shifts:\n"
                            + timeFormat.format(prev_event.getStartDate()) + " to "
                            + timeFormat.format(prev_event.getEndDate()) + " (" + prev_event.getShiftType()
                            + ")\n" + timeFormat.format(cur_event.getStartDate()) + " to "
                            + timeFormat.format(cur_event.getEndDate()) + " (" + cur_event.getShiftType()
                            + ")\n\n");
                    prev_event.setEndDate(cur_event.getEndDate()); //TODO: only merge if other + FOH/BOH, note times in new description
                    it.remove();
                }
            }
            prev_event = cur_event;
        }

        //next, load local events
        Cursor c1 = mContentResolver.query(
                Events.CONTENT_URI.buildUpon().appendQueryParameter(Events.ACCOUNT_NAME, account.name)
                        .appendQueryParameter(Events.ACCOUNT_TYPE, account.type).build(),
                new String[] { Events._ID, Events._SYNC_ID }, Events.CALENDAR_ID + "=?",
                new String[] { String.valueOf(calendar_id) }, null);
        while (c1 != null && c1.moveToNext()) {
            //if(is_full_sync) {
            //   deleteEvent(context, account, c1.getLong(0));
            //} else {
            SyncEntry entry = new SyncEntry();
            entry.raw_id = c1.getLong(0);
            localEvents.put(c1.getLong(1), entry);
            //}
        }
        c1.close();
        try {
            ArrayList<ContentProviderOperation> operationList = new ArrayList<ContentProviderOperation>();
            for (Event event : events) {

                if (localEvents.containsKey(Long.valueOf(event.getId()))) {
                    SyncEntry entry = localEvents.get(Long.valueOf(event.getId()));
                    operationList.add(updateEvent(calendar_id, account, event, entry.raw_id));
                } else {
                    operationList.add(updateEvent(calendar_id, account, event, -1));
                }

                if (operationList.size() >= 50) {
                    try {
                        mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                    operationList.clear();
                }
            }

            if (operationList.size() > 0) {
                try {
                    mContentResolver.applyBatch(CalendarContract.AUTHORITY, operationList);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        } catch (Exception e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
            return;
        }

    } else {
        Log.e(TAG, "Server error in sending dirty contacts: " + resp.getStatusLine());
        throw new IOException();
    }
}

From source file:io.seldon.importer.articles.FileItemAttributesImporter.java

public static Map<String, String> getAttributes(String url, String existingCategory) {
    ItemProcessResult itemProcessResult = new ItemProcessResult();
    itemProcessResult.client_item_id = url;
    itemProcessResult.extraction_status = "EXTRACTION_FAILED";

    logger.info("Trying to get attributes for " + url);
    Map<String, String> attributes = null;
    String title = "";
    String category = "";
    String subCategory = "";
    String img_url = "";
    String description = "";
    String tags = "";
    String leadtext = "";
    String link = "";
    String publishDate = "";
    String domain = "";
    try {/*from   w w w .j a va  2 s  . c o  m*/
        long now = System.currentTimeMillis();
        long timeSinceLastRequest = now - lastUrlFetchTime;
        if (timeSinceLastRequest < minFetchGapMsecs) {
            long timeToSleep = minFetchGapMsecs - timeSinceLastRequest;
            logger.info(
                    "Sleeping " + timeToSleep + "msecs as time since last fetch is " + timeSinceLastRequest);
            Thread.sleep(timeToSleep);
        }
        Document articleDoc = Jsoup.connect(url).userAgent("SeldonBot/1.0").timeout(httpGetTimeout).get();
        lastUrlFetchTime = System.currentTimeMillis();
        //get IMAGE URL
        if (StringUtils.isNotBlank(imageCssSelector)) {
            Element imageElement = articleDoc.select(imageCssSelector).first();
            if (imageElement != null) {
                if (imageElement.attr("content") != null) {
                    img_url = imageElement.attr("content");
                }
                if (StringUtils.isBlank(img_url) && imageElement.attr("src") != null) {
                    img_url = imageElement.attr("src");
                }
                if (StringUtils.isBlank(img_url) && imageElement.attr("href") != null) {
                    img_url = imageElement.attr("href");
                }

            }
        }
        if (StringUtils.isBlank(img_url) && StringUtils.isNotBlank(defImageUrl)) {
            logger.info("Setting image to default: " + defImageUrl);
            img_url = defImageUrl;
        }
        img_url = StringUtils.strip(img_url);

        //get TITLE
        if (StringUtils.isNotBlank(titleCssSelector)) {
            Element titleElement = articleDoc.select(titleCssSelector).first();
            if (titleElement != null && titleElement.attr("content") != null) {
                title = titleElement.attr("content");
            }
        }

        //get Lead Text
        if (StringUtils.isNotBlank(leadTextCssSelector)) {
            Element leadElement = articleDoc.select(leadTextCssSelector).first();
            if (leadElement != null && leadElement.attr("content") != null) {
                leadtext = leadElement.attr("content");
            }
        }

        //get publish date
        if (StringUtils.isNotBlank(publishDateCssSelector)) {
            //2013-01-21T10:40:55Z
            Element pubElement = articleDoc.select(publishDateCssSelector).first();
            if (pubElement != null && pubElement.attr("content") != null) {
                String pubtext = pubElement.attr("content");
                SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
                Date result = null;
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date withUTC format " + pubtext);
                }
                //try a simpler format
                df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
                try {
                    result = df.parse(pubtext);
                } catch (ParseException e) {
                    logger.info("Failed to parse date " + pubtext);
                }

                if (result != null)
                    publishDate = dateFormatter.format(result);
                else
                    logger.error("Failed to parse date " + pubtext);
            }
        }

        //get Link
        if (StringUtils.isNotBlank(linkCssSelector)) {
            Element linkElement = articleDoc.select(linkCssSelector).first();
            if (linkElement != null && linkElement.attr("content") != null) {
                link = linkElement.attr("content");
            }
        }

        //get CONTENT
        if (StringUtils.isNotBlank(textCssSelector)) {
            Element descriptionElement = articleDoc.select(textCssSelector).first();
            if (descriptionElement != null)
                description = Jsoup.parse(descriptionElement.html()).text();
        }

        //get TAGS
        Set<String> tagSet = AttributesImporterUtils.getTags(articleDoc, tagsCssSelector, title);

        if (tagSet.size() > 0)
            tags = CollectionTools.join(tagSet, ",");

        //get CATEGORY - client specific
        if (StringUtils.isNotBlank(categoryCssSelector)) {
            Element categoryElement = articleDoc.select(categoryCssSelector).first();
            if (categoryElement != null && categoryElement.attr("content") != null) {
                category = categoryElement.attr("content");
                if (StringUtils.isNotBlank(category))
                    category = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(categoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + categoryClassPrefix
                    + "CategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            category = extractor.getCategory(url, articleDoc);
        }

        //get Sub CATEGORY - client specific
        if (StringUtils.isNotBlank(subCategoryCssSelector)) {
            Element subCategoryElement = articleDoc.select(subCategoryCssSelector).first();
            if (subCategoryElement != null && subCategoryElement.attr("content") != null) {
                subCategory = subCategoryElement.attr("content");
                if (StringUtils.isNotBlank(subCategory))
                    subCategory = category.toUpperCase();
            }
        } else if (StringUtils.isNotBlank(subCategoryClassPrefix)) {
            String className = "io.seldon.importer.articles.category." + subCategoryClassPrefix
                    + "SubCategoryExtractor";
            Class<?> clazz = Class.forName(className);
            Constructor<?> ctor = clazz.getConstructor();
            CategoryExtractor extractor = (CategoryExtractor) ctor.newInstance();
            subCategory = extractor.getCategory(url, articleDoc);
        }

        // Get domain
        if (domainIsNeeded) {
            domain = getDomain(url);
        }

        if (StringUtils.isNotBlank(title) && (imageNotNeeded || StringUtils.isNotBlank(img_url))
                && (categoryNotNeeded || StringUtils.isNotBlank(category))
                && (!domainIsNeeded || StringUtils.isNotBlank(domain))) {
            attributes = new HashMap<String, String>();
            attributes.put(TITLE, title);
            if (StringUtils.isNotBlank(category))
                attributes.put(CATEGORY, category);
            if (StringUtils.isNotBlank(subCategory))
                attributes.put(SUBCATEGORY, subCategory);
            if (StringUtils.isNotBlank(link))
                attributes.put(LINK, link);
            if (StringUtils.isNotBlank(leadtext))
                attributes.put(LEAD_TEXT, leadtext);
            if (StringUtils.isNotBlank(img_url))
                attributes.put(IMG_URL, img_url);
            if (StringUtils.isNotBlank(tags))
                attributes.put(TAGS, tags);
            attributes.put(CONTENT_TYPE, VERIFIED_CONTENT_TYPE);
            if (StringUtils.isNotBlank(description))
                attributes.put(DESCRIPTION, description);
            if (StringUtils.isNotBlank(publishDate))
                attributes.put(PUBLISH_DATE, publishDate);
            if (StringUtils.isNotBlank(domain))
                attributes.put(DOMAIN, domain);
            System.out.println("Item: " + url + "; Category: " + category);
            itemProcessResult.extraction_status = "EXTRACTION_SUCCEEDED";
        } else {
            logger.warn("Failed to get title for article " + url);
            logger.warn("[title=" + title + ", img_url=" + img_url + ", category=" + category + ", domain="
                    + domain + "]");
        }

        { // check for failures for the log result
            if (StringUtils.isBlank(title)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "title";
            }
            if (!imageNotNeeded && StringUtils.isBlank(img_url)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",") + "img_url";
            }
            if (!categoryNotNeeded && StringUtils.isBlank(category)) {
                itemProcessResult.attrib_failure_list = itemProcessResult.attrib_failure_list
                        + ((StringUtils.isBlank(itemProcessResult.attrib_failure_list)) ? "" : ",")
                        + "category";
            }
        }
    } catch (Exception e) {
        logger.warn("Article: " + url + ". Attributes import FAILED", e);
        itemProcessResult.error = e.toString();
    }

    AttributesImporterUtils.logResult(logger, itemProcessResult);

    return attributes;
}

From source file:com.kantenkugel.discordbot.jdocparser.JDocParser.java

private static Element getSingleElementByClass(Element root, String className) {
    Elements elementsByClass = root.getElementsByClass(className);
    if (elementsByClass.size() != 1) {
        String error = "Found " + elementsByClass.size() + " elements with class " + className + " inside of "
                + root.tagName() + "-" + root.className();
        throw new RuntimeException(error + root.html());
    }// w  w  w. jav  a 2  s.  com
    return elementsByClass.first();
}