de.dplatz.padersprinter.control.TripService.java Source code

Java tutorial

Introduction

Here is the source code for de.dplatz.padersprinter.control.TripService.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package de.dplatz.padersprinter.control;

import de.dplatz.padersprinter.entity.Leg;
import de.dplatz.padersprinter.entity.TripQuery;
import de.dplatz.padersprinter.entity.Transport;
import de.dplatz.padersprinter.entity.Trip;
import java.time.LocalTime;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.DomSerializer;
import org.htmlcleaner.HtmlCleaner;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import rx.Observable;

/**
 *
 * @author daniel.platz
 */
public class TripService {

    private static final Logger logger = Logger.getLogger(TripService.class.getName());

    private static final String TRIP_NODES_XPATH = "//div[contains(@class, 'efa-result')]/div[contains(@class, 'col-md-12')]/div[contains(@class, 'panel panel-default')]";

    HttpClient httpClient = new HttpClient();

    public Observable<Trip> query(TripQuery query) {
        final HtmlCleaner htmlCleaner = new HtmlCleaner();
        final DomSerializer domSerializer = new DomSerializer(new CleanerProperties());
        final XPath xpath = XPathFactory.newInstance().newXPath();

        return httpClient.get(query).map(htmlCleaner::clean).flatMap(tagNode -> {
            try {
                return Observable.just(domSerializer.createDOM(tagNode));
            } catch (ParserConfigurationException pce) {
                return Observable.error(pce);
            }
        }).flatMap(doc -> {
            try {
                return Observable.just((NodeList) xpath.evaluate(TRIP_NODES_XPATH, doc, XPathConstants.NODESET));
            } catch (XPathExpressionException xee) {
                return Observable.error(xee);
            }
        }).flatMap(nodeList -> {
            List<Node> nodes = new LinkedList<>();
            for (int i = 0; i < nodeList.getLength(); i++) {
                nodes.add(nodeList.item(i));
            }
            logger.info("HTML contains " + nodes.size() + " result-panels.");
            return Observable.from(nodes);
        }).flatMap(tripNode -> parseTrip(tripNode).map(Observable::just).orElseGet(Observable::empty));
    }

    static class HttpClient {

        public Observable<String> get(TripQuery query) {
            return Observable.fromCallable(() -> {
                final Client client = ClientBuilder.newClient();
                final long startTime = System.currentTimeMillis();
                final String result = query.toWebTarget(client).request().get(String.class);
                final long duration = System.currentTimeMillis() - startTime;
                logger.info("get: " + duration + "ms");
                return result;
            });
        }
    }

    Optional<Trip> parseTrip(Node node) {
        final XPath xpath = XPathFactory.newInstance().newXPath();
        final LocalTime begin;
        final LocalTime end;
        final String duration;
        final int transferCount;
        try {
            begin = LocalTime.parse(parseStringNode(node,
                    "./div[contains(@class, 'panel-heading')]/table[contains(@class, 'tripTable')]//tbody/tr/td[2]/text()",
                    xpath));
            end = LocalTime.parse(parseStringNode(node,
                    "./div[contains(@class, 'panel-heading')]/table[contains(@class, 'tripTable')]//tbody/tr/td[3]/text()",
                    xpath));
            duration = parseStringNode(node,
                    "./div[contains(@class, 'panel-heading')]/table[contains(@class, 'tripTable')]//tbody/tr/td[4]/text()",
                    xpath);
            transferCount = parseIntegerNode(node,
                    "./div[contains(@class, 'panel-heading')]/table[contains(@class, 'tripTable')]//tbody/tr/td[5]/text()",
                    xpath);
        } catch (Exception ex) {
            logger.log(Level.ERROR, null, ex);
            return Optional.empty();
        }

        Optional<List<Leg>> legs = parseLegs(node);
        if (legs.isPresent()) {
            Trip t = new Trip(begin, end, duration, transferCount, legs.get());
            logger.debug("Parsed trip: " + t);
            return Optional.of(t);
        } else {
            return Optional.empty();
        }
    }

    Optional<List<Leg>> parseLegs(Node node) {
        XPath xpath = XPathFactory.newInstance().newXPath();

        try {
            NodeList legNodes = (NodeList) xpath.evaluate(".//table[contains(@class, 'legTable')]", node,
                    XPathConstants.NODESET);

            final List<Leg> legs = new LinkedList<>();

            logger.debug("Number of legs indentified: " + legNodes.getLength());
            for (int i = 0; i < legNodes.getLength(); i++) {
                Optional<Leg> leg = parseLeg(legNodes.item(i), xpath);

                if (!leg.isPresent()) {
                    logger.info("At least one leg could not be parsed. Ignoring trip.");
                    return Optional.empty();
                }

                legs.add(leg.get());
            }
            return Optional.of(legs);
        } catch (Exception ex) {
            logger.log(Level.ERROR, null, ex);
            return Optional.empty();
        }
    }

    Optional<Leg> parseLeg(Node legNode, XPath xpath) throws XPathExpressionException {
        final Node startNode = getNode(legNode, "./tbody/tr[./td[text() = 'ab']]", xpath);
        final Node endNode = getNode(legNode, "./tbody/tr[./td[text() = 'an']]", xpath);

        LocalTime startTime = LocalTime.parse(parseStringNode(startNode, "./td[1]", xpath));
        String startLocation = parseStringNode(startNode, "./td[4]", xpath);

        LocalTime endTime = LocalTime.parse(parseStringNode(endNode, "./td[1]", xpath));
        String endLocation = parseStringNode(endNode, "./td[4]", xpath);

        if (isNodePresent(legNode, ".//i[contains(@class, 'icon-pedestrian')]", xpath)) {
            final String id = parseStringNode(legNode,
                    "./tbody/tr[.//i[contains(@class, 'icon-pedestrian')]]/td[4]", xpath);
            Leg l = new Leg(Transport.walk(id), startTime, startLocation, endTime, endLocation);
            return Optional.of(l);
        } else if (isNodePresent(legNode, ".//i[contains(@class, 'fa-bus')]", xpath)) {
            final String id = parseStringNode(legNode, "./tbody/tr[.//i[contains(@class, 'fa-bus')]]/td[4]", xpath);
            Leg l = new Leg(Transport.bus(id), startTime, startLocation, endTime, endLocation);
            return Optional.of(l);
        } else {
            logger.debug("Unknown leg-type: " + legNode);
            return Optional.empty();
        }
    }

    static String parseStringNode(Node node, String expr, XPath xpath) throws XPathExpressionException {
        String val = (String) xpath.evaluate(expr, node, XPathConstants.STRING);
        val = StringEscapeUtils.unescapeHtml4(val);
        return val;
    }

    static int parseIntegerNode(Node node, String expr, XPath xpath) throws XPathExpressionException {
        return Integer.parseInt((String) xpath.evaluate(expr, node, XPathConstants.STRING));
    }

    static boolean isNodePresent(Node node, String expr, XPath xpath) throws XPathExpressionException {
        return xpath.evaluate(expr, node, XPathConstants.NODE) != null;
    }

    static Node getNode(Node node, String expr, XPath xpath) throws XPathExpressionException {
        return (Node) xpath.evaluate(expr, node, XPathConstants.NODE);
    }
}