Java tutorial
/* * Copyright (C) 2015 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.cloud.dataflow.sdk.transforms.windowing; import com.google.cloud.dataflow.sdk.annotations.Experimental; import com.google.cloud.dataflow.sdk.annotations.Experimental.Kind; import com.google.cloud.dataflow.sdk.coders.Coder; import com.google.cloud.dataflow.sdk.transforms.display.DisplayData; import org.joda.time.Duration; import org.joda.time.Instant; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Objects; /** * A {@link WindowFn} that windows values into possibly overlapping fixed-size * timestamp-based windows. * * <p>For example, in order to window data into 10 minute windows that * update every minute: * <pre> {@code * PCollection<Integer> items = ...; * PCollection<Integer> windowedItems = items.apply( * Window.<Integer>into(SlidingWindows.of(Duration.standardMinutes(10)))); * } </pre> */ public class SlidingWindows extends NonMergingWindowFn<Object, IntervalWindow> { /** * Amount of time between generated windows. */ private final Duration period; /** * Size of the generated windows. */ private final Duration size; /** * Offset of the generated windows. * Windows start at time N * start + offset, where 0 is the epoch. */ private final Duration offset; /** * Assigns timestamps into half-open intervals of the form * [N * period, N * period + size), where 0 is the epoch. * * <p>If {@link SlidingWindows#every} is not called, the period defaults * to the largest time unit smaller than the given duration. For example, * specifying a size of 5 seconds will result in a default period of 1 second. */ public static SlidingWindows of(Duration size) { return new SlidingWindows(getDefaultPeriod(size), size, Duration.ZERO); } /** * Returns a new {@code SlidingWindows} with the original size, that assigns * timestamps into half-open intervals of the form * [N * period, N * period + size), where 0 is the epoch. */ public SlidingWindows every(Duration period) { return new SlidingWindows(period, size, offset); } /** * Assigns timestamps into half-open intervals of the form * [N * period + offset, N * period + offset + size). * * @throws IllegalArgumentException if offset is not in [0, period) */ public SlidingWindows withOffset(Duration offset) { return new SlidingWindows(period, size, offset); } private SlidingWindows(Duration period, Duration size, Duration offset) { if (offset.isShorterThan(Duration.ZERO) || !offset.isShorterThan(period) || !size.isLongerThan(Duration.ZERO)) { throw new IllegalArgumentException( "SlidingWindows WindowingStrategies must have 0 <= offset < period and 0 < size"); } this.period = period; this.size = size; this.offset = offset; } @Override public Coder<IntervalWindow> windowCoder() { return IntervalWindow.getCoder(); } @Override public Collection<IntervalWindow> assignWindows(AssignContext c) { List<IntervalWindow> windows = new ArrayList<>((int) (size.getMillis() / period.getMillis())); Instant timestamp = c.timestamp(); long lastStart = lastStartFor(timestamp); for (long start = lastStart; start > timestamp.minus(size).getMillis(); start -= period.getMillis()) { windows.add(new IntervalWindow(new Instant(start), size)); } return windows; } /** * Return the earliest window that contains the end of the main-input window. */ @Override public IntervalWindow getSideInputWindow(final BoundedWindow window) { if (window instanceof GlobalWindow) { throw new IllegalArgumentException( "Attempted to get side input window for GlobalWindow from non-global WindowFn"); } long lastStart = lastStartFor(window.maxTimestamp().minus(size)); return new IntervalWindow(new Instant(lastStart + period.getMillis()), size); } @Override public boolean isCompatible(WindowFn<?, ?> other) { return equals(other); } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder.add(DisplayData.item("size", size).withLabel("Window Size")) .add(DisplayData.item("period", period).withLabel("Window Period")) .add(DisplayData.item("offset", offset).withLabel("Window Start Offset")); } /** * Return the last start of a sliding window that contains the timestamp. */ private long lastStartFor(Instant timestamp) { return timestamp.getMillis() - timestamp.plus(period).minus(offset).getMillis() % period.getMillis(); } static Duration getDefaultPeriod(Duration size) { if (size.isLongerThan(Duration.standardHours(1))) { return Duration.standardHours(1); } if (size.isLongerThan(Duration.standardMinutes(1))) { return Duration.standardMinutes(1); } if (size.isLongerThan(Duration.standardSeconds(1))) { return Duration.standardSeconds(1); } return Duration.millis(1); } public Duration getPeriod() { return period; } public Duration getSize() { return size; } public Duration getOffset() { return offset; } /** * Ensures that later sliding windows have an output time that is past the end of earlier windows. * * <p>If this is the earliest sliding window containing {@code inputTimestamp}, that's fine. * Otherwise, we pick the earliest time that doesn't overlap with earlier windows. */ @Experimental(Kind.OUTPUT_TIME) @Override public OutputTimeFn<? super IntervalWindow> getOutputTimeFn() { return new OutputTimeFn.Defaults<BoundedWindow>() { @Override public Instant assignOutputTime(Instant inputTimestamp, BoundedWindow window) { Instant startOfLastSegment = window.maxTimestamp().minus(period); return startOfLastSegment.isBefore(inputTimestamp) ? inputTimestamp : startOfLastSegment.plus(1); } @Override public boolean dependsOnlyOnEarliestInputTimestamp() { return true; } }; } @Override public boolean equals(Object object) { if (!(object instanceof SlidingWindows)) { return false; } SlidingWindows other = (SlidingWindows) object; return getOffset().equals(other.getOffset()) && getSize().equals(other.getSize()) && getPeriod().equals(other.getPeriod()); } @Override public int hashCode() { return Objects.hash(size, offset, period); } }