at.yawk.fiction.impl.ao3.SearchPageParser.java Source code

Java tutorial

Introduction

Here is the source code for at.yawk.fiction.impl.ao3.SearchPageParser.java

Source

/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

package at.yawk.fiction.impl.ao3;

import at.yawk.fiction.HtmlText;
import at.yawk.fiction.Pageable;
import at.yawk.fiction.impl.PageParser;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import org.joda.time.LocalDate;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * @author yawkat
 */
class SearchPageParser extends PageParser<Pageable.Page<Ao3Story>> {
    private static final DateTimeFormatter DATE_PATTERN = DateTimeFormat.forPattern("dd MMM yyyy");

    @Override
    protected Pageable.Page<Ao3Story> create() {
        return new Pageable.Page<>();
    }

    @Override
    protected void parse(Element root, Pageable.Page<Ao3Story> target) throws Exception {
        List<Ao3Story> stories = new ArrayList<>();

        for (Element element : root.select("#main > .work.index > li")) {
            Ao3Story story = new Ao3Story();
            story.setId(parseIntLenient(element.attr("id")));
            story.setUri(URI.create("https://archiveofourown.org/works/" + story.getId()));
            Elements titleAndAuthor = element.select("> div > h4 > a");
            story.setTitle(titleAndAuthor.first().text());
            if (titleAndAuthor.size() > 1) {
                story.setAuthor(new Ao3Author());
                story.getAuthor().setName(titleAndAuthor.get(1).text());
            }
            story.setUpdateTime(LocalDate.parse(element.select(".datetime").first().text(), DATE_PATTERN)
                    .toDateTimeAtStartOfDay().toInstant());

            story.setRequiredTags(map(element.select(".required-tags > li .text"), Element::text));
            story.setWarnings(map(element.select(".tags > .warnings"), Element::text));
            story.setRelationships(map(element.select(".tags > .relationships"), Element::text));
            story.setCharacters(map(element.select(".tags > .characters"), Element::text));
            story.setFreeforms(map(element.select(".tags > .freeforms"), Element::text));

            Element summary = element.select(".summary").first();
            if (summary != null) {
                HtmlText description = new HtmlText();
                description.setHtml(summary.html());
                story.setDescription(description);
            }

            story.setLanguage(element.select(".stats dd.language").text());
            story.setWords(parseIntLenient(element.select(".stats dd.words").first().text()));
            Element chapters = element.select(".stats dd.chapters").first();
            if (chapters != null) {
                String[] parts = chapters.text().split("/");
                List<Ao3Chapter> chapterList;
                if (story.getChapters() == null) {
                    story.setChapters(chapterList = new ArrayList<>());
                } else {
                    //noinspection unchecked
                    chapterList = (List<Ao3Chapter>) story.getChapters();
                }
                int chapterCount = parseIntLenient(parts[0]);
                for (int i = 0; i < chapterCount; i++) {
                    while (chapterList.size() <= i) {
                        chapterList.add(new Ao3Chapter());
                    }
                }
                story.setChapterGoal(parts[1].equals("?") ? null : parseIntLenient(parts[1]));
            } else {
                story.setChapterGoal(null);
            }
            Element comments = element.select(".stats dd.comments").first();
            if (comments != null) {
                story.setCommentCount(parseIntLenient(comments.text()));
            } else {
                story.setCommentCount(0);
            }
            Element kudos = element.select(".stats dd.kudos").first();
            if (kudos != null) {
                story.setKudoCount(parseIntLenient(kudos.text()));
            } else {
                story.setKudoCount(0);
            }
            Element bookmarks = element.select(".stats dd.bookmarks").first();
            if (bookmarks != null) {
                story.setBookmarkCount(parseIntLenient(bookmarks.text()));
            } else {
                story.setBookmarkCount(0);
            }
            Element hits = element.select(".stats dd.hits").first();
            if (hits != null) {
                story.setHitCount(parseIntLenient(hits.text()));
            } else {
                story.setHitCount(0);
            }

            stories.add(story);
        }

        target.setPageCount(1);
        target.setEntries(stories);
        for (Element pageLink : root.select(".pagination > li")) {
            if (!pageLink.hasClass("next")) {
                target.setPageCount(parseIntLenient(pageLink.text()));
            } else if (!pageLink.select(".disabled").isEmpty()) {
                target.setLast(true);
            }
        }
        if (target.getPageCount() == 1) {
            target.setLast(true);
        }
    }

    private <I, O> List<O> map(List<I> list, Function<I, O> function) {
        return new ArrayList<>(Lists.transform(list, function));
    }
}