org.apache.apex.malhar.stream.sample.complete.TrafficRoutes.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.apex.malhar.stream.sample.complete.TrafficRoutes.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.apex.malhar.stream.sample.complete;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;

import javax.annotation.Nullable;

import org.joda.time.Duration;

import org.apache.apex.malhar.lib.function.Function;
import org.apache.apex.malhar.lib.util.KeyValPair;
import org.apache.apex.malhar.lib.window.TriggerOption;
import org.apache.apex.malhar.lib.window.Tuple;
import org.apache.apex.malhar.lib.window.WindowOption;
import org.apache.apex.malhar.lib.window.accumulation.Group;
import org.apache.apex.malhar.stream.api.ApexStream;
import org.apache.apex.malhar.stream.api.CompositeStreamTransform;
import org.apache.apex.malhar.stream.api.WindowedStream;
import org.apache.apex.malhar.stream.api.impl.StreamFactory;
import org.apache.hadoop.conf.Configuration;

import com.google.common.collect.Lists;

import com.datatorrent.api.Context;
import com.datatorrent.api.DAG;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.InputOperator;
import com.datatorrent.api.StreamingApplication;
import com.datatorrent.api.annotation.ApplicationAnnotation;
import com.datatorrent.common.util.BaseOperator;

import static org.apache.apex.malhar.stream.api.Option.Options.name;

/**
 * Beam's TrafficRoutes example.
 *
 * @since 3.5.0
 */
@ApplicationAnnotation(name = "TrafficRoutes")
public class TrafficRoutes implements StreamingApplication {
    static Map<String, String> sdStations = buildStationInfo();
    static final int WINDOW_DURATION = 3; // Default sliding window duration in minutes
    static final int WINDOW_SLIDE_EVERY = 1; // Default window 'slide every' setting in minutes

    /**
     * This class holds information about a station reading's average speed.
     */
    public static class StationSpeed implements Comparable<StationSpeed> {
        @Nullable
        String stationId;
        @Nullable
        Double avgSpeed;
        @Nullable
        Long timestamp;

        public StationSpeed() {
        }

        public StationSpeed(String stationId, Double avgSpeed, Long timestamp) {
            this.stationId = stationId;
            this.avgSpeed = avgSpeed;
            this.timestamp = timestamp;
        }

        public void setAvgSpeed(@Nullable Double avgSpeed) {
            this.avgSpeed = avgSpeed;
        }

        public void setStationId(@Nullable String stationId) {
            this.stationId = stationId;
        }

        public void setTimestamp(@Nullable Long timestamp) {
            this.timestamp = timestamp;
        }

        @Nullable
        public Long getTimestamp() {
            return timestamp;
        }

        public String getStationId() {
            return this.stationId;
        }

        public Double getAvgSpeed() {
            return this.avgSpeed;
        }

        @Override
        public int compareTo(StationSpeed other) {
            return Long.compare(this.timestamp, other.timestamp);
        }
    }

    /**
     * This class holds information about a route's speed/slowdown.
     */
    static class RouteInfo {
        @Nullable
        String route;
        @Nullable
        Double avgSpeed;
        @Nullable
        Boolean slowdownEvent;

        public RouteInfo() {

        }

        public RouteInfo(String route, Double avgSpeed, Boolean slowdownEvent) {
            this.route = route;
            this.avgSpeed = avgSpeed;
            this.slowdownEvent = slowdownEvent;
        }

        public String getRoute() {
            return this.route;
        }

        public Double getAvgSpeed() {
            return this.avgSpeed;
        }

        public Boolean getSlowdownEvent() {
            return this.slowdownEvent;
        }
    }

    /**
     * Extract the timestamp field from the input string, and wrap the input string in a {@link Tuple.TimestampedTuple}
     * with the extracted timestamp.
     */
    static class ExtractTimestamps implements Function.MapFunction<String, Tuple.TimestampedTuple<String>> {

        @Override
        public Tuple.TimestampedTuple<String> f(String input) {
            String[] items = input.split(",");
            String timestamp = tryParseTimestamp(items);

            return new Tuple.TimestampedTuple<>(Long.parseLong(timestamp), input);
        }
    }

    /**
     * Filter out readings for the stations along predefined 'routes', and output
     * (station, speed info) keyed on route.
     */
    static class ExtractStationSpeedFn
            implements Function.FlatMapFunction<Tuple.TimestampedTuple<String>, KeyValPair<String, StationSpeed>> {

        @Override
        public Iterable<KeyValPair<String, StationSpeed>> f(Tuple.TimestampedTuple<String> input) {

            ArrayList<KeyValPair<String, StationSpeed>> result = new ArrayList<>();
            String[] items = input.getValue().split(",");
            String stationType = tryParseStationType(items);
            // For this analysis, use only 'main line' station types
            if (stationType != null && stationType.equals("ML")) {
                Double avgSpeed = tryParseAvgSpeed(items);
                String stationId = tryParseStationId(items);
                // For this simple example, filter out everything but some hardwired routes.
                if (avgSpeed != null && stationId != null && sdStations.containsKey(stationId)) {
                    StationSpeed stationSpeed = new StationSpeed(stationId, avgSpeed, input.getTimestamp());
                    // The tuple key is the 'route' name stored in the 'sdStations' hash.
                    KeyValPair<String, StationSpeed> outputValue = new KeyValPair<>(sdStations.get(stationId),
                            stationSpeed);
                    result.add(outputValue);
                }
            }
            return result;
        }
    }

    /**
     * For a given route, track average speed for the window. Calculate whether
     * traffic is currently slowing down, via a predefined threshold. If a supermajority of
     * speeds in this sliding window are less than the previous reading we call this a 'slowdown'.
     * Note: these calculations are for example purposes only, and are unrealistic and oversimplified.
     */
    static class GatherStats implements
            Function.FlatMapFunction<Tuple.WindowedTuple<KeyValPair<String, List<StationSpeed>>>, Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>>> {
        @Override
        public Iterable<Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>>> f(
                Tuple.WindowedTuple<KeyValPair<String, List<StationSpeed>>> input) {
            ArrayList<Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>>> result = new ArrayList<>();
            String route = input.getValue().getKey();
            double speedSum = 0.0;
            int speedCount = 0;
            int speedups = 0;
            int slowdowns = 0;
            List<StationSpeed> infoList = Lists.newArrayList(input.getValue().getValue());
            // StationSpeeds sort by embedded timestamp.
            Collections.sort(infoList);
            Map<String, Double> prevSpeeds = new HashMap<>();
            // For all stations in the route, sum (non-null) speeds. Keep a count of the non-null speeds.
            for (StationSpeed item : infoList) {
                Double speed = item.getAvgSpeed();
                if (speed != null) {
                    speedSum += speed;
                    speedCount++;
                    Double lastSpeed = prevSpeeds.get(item.getStationId());
                    if (lastSpeed != null) {
                        if (lastSpeed < speed) {
                            speedups += 1;
                        } else {
                            slowdowns += 1;
                        }
                    }
                    prevSpeeds.put(item.getStationId(), speed);
                }
            }
            if (speedCount == 0) {
                // No average to compute.
                return result;
            }
            double speedAvg = speedSum / speedCount;
            boolean slowdownEvent = slowdowns >= 2 * speedups;
            RouteInfo routeInfo = new RouteInfo(route, speedAvg, slowdownEvent);
            result.add(new Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>>(input.getTimestamp(),
                    new KeyValPair<String, RouteInfo>(route, routeInfo)));
            return result;
        }
    }

    /**
     * Output Pojo class for outputting result to JDBC.
     */
    static class OutputPojo {
        private Double avgSpeed;
        private Boolean slowdownEvent;
        private String key;
        private Long timestamp;

        public OutputPojo() {
        }

        public OutputPojo(Double avgSpeed, Boolean slowdownEvent, String key, Long timestamp) {
            this.avgSpeed = avgSpeed;
            this.slowdownEvent = slowdownEvent;
            this.key = key;
            this.timestamp = timestamp;
        }

        @Override
        public String toString() {
            return key + " + " + avgSpeed + " + " + slowdownEvent + " + " + timestamp;
        }

        public void setTimestamp(Long timestamp) {
            this.timestamp = timestamp;
        }

        public Long getTimestamp() {
            return timestamp;
        }

        public void setAvgSpeed(Double avgSpeed) {
            this.avgSpeed = avgSpeed;
        }

        public Double getAvgSpeed() {
            return avgSpeed;
        }

        public void setKey(String key) {
            this.key = key;
        }

        public String getKey() {
            return key;
        }

        public void setSlowdownEvent(Boolean slowdownEvent) {
            this.slowdownEvent = slowdownEvent;
        }

        public Boolean getSlowdownEvent() {
            return slowdownEvent;
        }

    }

    public static class Collector extends BaseOperator {
        private static Map<KeyValPair<Long, String>, KeyValPair<Double, Boolean>> result = new HashMap<>();

        public static Map<KeyValPair<Long, String>, KeyValPair<Double, Boolean>> getResult() {
            return result;
        }

        public final transient DefaultInputPort<OutputPojo> input = new DefaultInputPort<OutputPojo>() {
            @Override
            public void process(OutputPojo tuple) {
                result.put(new KeyValPair<Long, String>(tuple.getTimestamp(), tuple.getKey()),
                        new KeyValPair<Double, Boolean>(tuple.getAvgSpeed(), tuple.getSlowdownEvent()));
            }
        };
    }

    /**
     * Format the results of the slowdown calculations to a OutputPojo.
     */
    static class FormatStatsFn
            implements Function.MapFunction<Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>>, OutputPojo> {
        @Override
        public OutputPojo f(Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>> input) {
            RouteInfo routeInfo = input.getValue().getValue();
            OutputPojo row = new OutputPojo(routeInfo.getAvgSpeed(), routeInfo.getSlowdownEvent(),
                    input.getValue().getKey(), input.getTimestamp());
            return row;
        }
    }

    /**
     * This composite transformation extracts speed info from traffic station readings.
     * It groups the readings by 'route' and analyzes traffic slowdown for that route.
     * Lastly, it formats the results for JDBC.
     */
    static class TrackSpeed extends
            CompositeStreamTransform<WindowedStream<KeyValPair<String, StationSpeed>>, WindowedStream<OutputPojo>> {
        @Override
        public WindowedStream<OutputPojo> compose(WindowedStream<KeyValPair<String, StationSpeed>> inputStream) {
            // Apply a GroupByKey transform to collect a list of all station
            // readings for a given route.
            WindowedStream<Tuple.WindowedTuple<KeyValPair<String, List<StationSpeed>>>> timeGroup = inputStream
                    .accumulateByKey(new Group<StationSpeed>(),
                            new Function.ToKeyValue<KeyValPair<String, StationSpeed>, String, StationSpeed>() {
                                @Override
                                public Tuple<KeyValPair<String, StationSpeed>> f(
                                        KeyValPair<String, StationSpeed> input) {
                                    return new Tuple.TimestampedTuple<>(input.getValue().getTimestamp(), input);
                                }
                            }, name("GroupByKey"));

            // Analyze 'slowdown' over the route readings.
            WindowedStream<Tuple.TimestampedTuple<KeyValPair<String, RouteInfo>>> stats = timeGroup
                    .flatMap(new GatherStats(), name("GatherStats"));

            // Format the results for writing to JDBC table.
            WindowedStream<OutputPojo> results = stats.map(new FormatStatsFn(), name("FormatStatsFn"));

            return results;
        }
    }

    private static Double tryParseAvgSpeed(String[] inputItems) {
        try {
            return Double.parseDouble(tryParseString(inputItems, 3));
        } catch (NumberFormatException e) {
            return null;
        } catch (NullPointerException e) {
            return null;
        }
    }

    private static String tryParseStationType(String[] inputItems) {
        return tryParseString(inputItems, 2);
    }

    private static String tryParseStationId(String[] inputItems) {
        return tryParseString(inputItems, 1);
    }

    private static String tryParseTimestamp(String[] inputItems) {
        return tryParseString(inputItems, 0);
    }

    private static String tryParseString(String[] inputItems, int index) {
        return inputItems.length >= index ? inputItems[index] : null;
    }

    /**
     * Define some small hard-wired San Diego 'routes' to track based on sensor station ID.
     */
    private static Map<String, String> buildStationInfo() {
        Map<String, String> stations = new Hashtable<String, String>();
        stations.put("1108413", "SDRoute1"); // from freeway 805 S
        stations.put("1108699", "SDRoute2"); // from freeway 78 E
        stations.put("1108702", "SDRoute2");
        return stations;
    }

    /**
     * A dummy generator to generate some traffic information.
     */
    public static class InfoGen extends BaseOperator implements InputOperator {
        public transient DefaultOutputPort<String> output = new DefaultOutputPort<>();

        private String[] stationTypes = new String[] { "ML", "BL", "GL" };
        private int[] stationIDs = new int[] { 1108413, 1108699, 1108702 };
        private double ave = 55.0;
        private long timestamp;
        private static final Duration RAND_RANGE = Duration.standardMinutes(10);
        private static int tupleCount = 0;

        public static int getTupleCount() {
            return tupleCount;
        }

        @Override
        public void setup(Context.OperatorContext context) {
            tupleCount = 0;
            timestamp = System.currentTimeMillis();
        }

        @Override
        public void emitTuples() {
            for (String stationType : stationTypes) {
                for (int stationID : stationIDs) {
                    double speed = Math.random() * 20 + ave;
                    long time = (long) (Math.random() * RAND_RANGE.getMillis()) + timestamp;
                    try {
                        output.emit(time + "," + stationID + "," + stationType + "," + speed);
                        tupleCount++;

                        Thread.sleep(50);
                    } catch (Exception e) {
                        // Ignore it
                    }
                }
            }
        }
    }

    @Override
    public void populateDAG(DAG dag, Configuration conf) {
        InfoGen infoGen = new InfoGen();
        Collector collector = new Collector();

        // Create a stream from the input operator.
        ApexStream<Tuple.TimestampedTuple<String>> stream = StreamFactory
                .fromInput(infoGen, infoGen.output, name("infoGen"))

                // Extract the timestamp from the input and wrap it into a TimestampedTuple.
                .map(new ExtractTimestamps(), name("ExtractTimestamps"));

        stream
                // Extract the average speed of a station.
                .flatMap(new ExtractStationSpeedFn(), name("ExtractStationSpeedFn"))

                // Apply window and trigger option.
                .window(new WindowOption.SlidingTimeWindows(Duration.standardMinutes(WINDOW_DURATION),
                        Duration.standardMinutes(WINDOW_SLIDE_EVERY)),
                        new TriggerOption().withEarlyFiringsAtEvery(Duration.millis(5000)).accumulatingFiredPanes())

                // Apply TrackSpeed composite transformation to compute the route information.
                .addCompositeStreams(new TrackSpeed())

                // print the result to console.
                .print(name("console")).endWith(collector, collector.input, name("Collector")).populateDag(dag);
    }
}