List of usage examples for org.joda.time Instant isAfter
public boolean isAfter(long instant)
From source file:com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.java
License:Apache License
/** * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and * verifies that result of {@code windowFn.getOutputTimestamp} for each window is within the * proper bound./*from w w w . j a v a 2s. c o m*/ */ public static <T, W extends BoundedWindow> void validateNonInterferingOutputTimes(WindowFn<T, W> windowFn, long timestamp) throws Exception { Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp); Instant instant = new Instant(timestamp); for (W window : windows) { Instant outputTimestamp = windowFn.getOutputTimeFn().assignOutputTime(instant, window); assertFalse("getOutputTime must be greater than or equal to input timestamp", outputTimestamp.isBefore(instant)); assertFalse("getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } }
From source file:com.google.cloud.dataflow.sdk.testing.WindowFnTestUtils.java
License:Apache License
/** * Assigns the given {@code timestamp} to windows using the specified {@code windowFn}, and * verifies that result of {@link WindowFn#getOutputTime windowFn.getOutputTime} for later windows * (as defined by {@code maxTimestamp} won't prevent the watermark from passing the end of earlier * windows./*from ww w .ja v a 2s .c o m*/ * * <p>This verifies that overlapping windows don't interfere at all. Depending on the * {@code windowFn} this may be stricter than desired. */ public static <T, W extends BoundedWindow> void validateGetOutputTimestamp(WindowFn<T, W> windowFn, long timestamp) throws Exception { Collection<W> windows = WindowFnTestUtils.<T, W>assignedWindows(windowFn, timestamp); List<W> sortedWindows = new ArrayList<>(windows); Collections.sort(sortedWindows, new Comparator<BoundedWindow>() { @Override public int compare(BoundedWindow o1, BoundedWindow o2) { return o1.maxTimestamp().compareTo(o2.maxTimestamp()); } }); Instant instant = new Instant(timestamp); Instant endOfPrevious = null; for (W window : sortedWindows) { Instant outputTimestamp = windowFn.getOutputTimeFn().assignOutputTime(instant, window); if (endOfPrevious == null) { // If this is the first window, the output timestamp can be anything, as long as it is in // the valid range. assertFalse("getOutputTime must be greater than or equal to input timestamp", outputTimestamp.isBefore(instant)); assertFalse("getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } else { // If this is a later window, the output timestamp must be after the end of the previous // window assertTrue("getOutputTime must be greater than the end of the previous window", outputTimestamp.isAfter(endOfPrevious)); assertFalse("getOutputTime must be less than or equal to the max timestamp", outputTimestamp.isAfter(window.maxTimestamp())); } endOfPrevious = window.maxTimestamp(); } }
From source file:com.google.cloud.dataflow.sdk.transforms.windowing.AfterFirst.java
License:Apache License
@Override public Instant getWatermarkThatGuaranteesFiring(W window) { // This trigger will fire after the earliest of its sub-triggers. Instant deadline = BoundedWindow.TIMESTAMP_MAX_VALUE; for (Trigger<W> subTrigger : subTriggers) { Instant subDeadline = subTrigger.getWatermarkThatGuaranteesFiring(window); if (deadline.isAfter(subDeadline)) { deadline = subDeadline;/* w w w . j a v a 2 s . co m*/ } } return deadline; }
From source file:com.google.cloud.dataflow.sdk.util.BatchTimerInternals.java
License:Apache License
private void advance(ReduceFnRunner<?, ?, ?, ?> runner, Instant newTime, TimeDomain domain) throws Exception { PriorityQueue<TimerData> timers = queue(domain); boolean shouldFire = false; do {/*from ww w.j a v a 2 s .co m*/ TimerData timer = timers.peek(); // Timers fire if the new time is ahead of the timer shouldFire = timer != null && newTime.isAfter(timer.getTimestamp()); if (shouldFire) { // Remove before firing, so that if the trigger adds another identical // timer we don't remove it. timers.remove(); runner.onTimer(timer); } } while (shouldFire); }
From source file:com.google.cloud.dataflow.sdk.util.MonitoringUtil.java
License:Apache License
/** * Return job messages sorted in ascending order by timestamp. * @param jobId The id of the job to get the messages for. * @param startTimestampMs Return only those messages with a * timestamp greater than this value./*from www . ja v a 2 s . co m*/ * @return collection of messages * @throws IOException */ public ArrayList<JobMessage> getJobMessages(String jobId, long startTimestampMs) throws IOException { // TODO: Allow filtering messages by importance Instant startTimestamp = new Instant(startTimestampMs); ArrayList<JobMessage> allMessages = new ArrayList<>(); String pageToken = null; while (true) { Messages.List listRequest = messagesClient.list(projectId, jobId); if (pageToken != null) { listRequest.setPageToken(pageToken); } ListJobMessagesResponse response = listRequest.execute(); if (response == null || response.getJobMessages() == null) { return allMessages; } for (JobMessage m : response.getJobMessages()) { @Nullable Instant timestamp = fromCloudTime(m.getTime()); if (timestamp == null) { continue; } if (timestamp.isAfter(startTimestamp)) { allMessages.add(m); } } if (response.getNextPageToken() == null) { break; } else { pageToken = response.getNextPageToken(); } } Collections.sort(allMessages, new TimeStampComparator()); return allMessages; }
From source file:com.google.cloud.dataflow.sdk.util.PaneInfoTracker.java
License:Apache License
private <W> PaneInfo describePane(Object key, Instant windowMaxTimestamp, PaneInfo previousPane, boolean isFinal) { boolean isFirst = previousPane == null; Timing previousTiming = isFirst ? null : previousPane.getTiming(); long index = isFirst ? 0 : previousPane.getIndex() + 1; long nonSpeculativeIndex = isFirst ? 0 : previousPane.getNonSpeculativeIndex() + 1; Instant outputWM = timerInternals.currentOutputWatermarkTime(); Instant inputWM = timerInternals.currentInputWatermarkTime(); // True if it is not possible to assign the element representing this pane a timestamp // which will make an ON_TIME pane for any following computation. // Ie true if the element's latest possible timestamp is before the current output watermark. boolean isLateForOutput = outputWM != null && windowMaxTimestamp.isBefore(outputWM); // True if all emitted panes (if any) were EARLY panes. // Once the ON_TIME pane has fired, all following panes must be considered LATE even // if the output watermark is behind the end of the window. boolean onlyEarlyPanesSoFar = previousTiming == null || previousTiming == Timing.EARLY; // True is the input watermark hasn't passed the window's max timestamp. boolean isEarlyForInput = !inputWM.isAfter(windowMaxTimestamp); Timing timing;//from www. ja va 2 s . c om if (isLateForOutput || !onlyEarlyPanesSoFar) { // The output watermark has already passed the end of this window, or we have already // emitted a non-EARLY pane. Irrespective of how this pane was triggered we must // consider this pane LATE. timing = Timing.LATE; } else if (isEarlyForInput) { // This is an EARLY firing. timing = Timing.EARLY; nonSpeculativeIndex = -1; } else { // This is the unique ON_TIME firing for the window. timing = Timing.ON_TIME; } WindowTracing.debug( "describePane: {} pane (prev was {}) for key:{}; windowMaxTimestamp:{}; " + "inputWatermark:{}; outputWatermark:{}; isLateForOutput:{}", timing, previousTiming, key, windowMaxTimestamp, inputWM, outputWM, isLateForOutput); if (previousPane != null) { // Timing transitions should follow EARLY* ON_TIME? LATE* switch (previousTiming) { case EARLY: Preconditions.checkState( timing == Timing.EARLY || timing == Timing.ON_TIME || timing == Timing.LATE, "EARLY cannot transition to %s", timing); break; case ON_TIME: Preconditions.checkState(timing == Timing.LATE, "ON_TIME cannot transition to %s", timing); break; case LATE: Preconditions.checkState(timing == Timing.LATE, "LATE cannot transtion to %s", timing); break; case UNKNOWN: break; } Preconditions.checkState(!previousPane.isLast(), "Last pane was not last after all."); } return PaneInfo.createPane(isFirst, isFinal, timing, index, nonSpeculativeIndex); }
From source file:com.google.cloud.dataflow.sdk.util.ReduceFnRunner.java
License:Apache License
/** * Process an element./*from www . ja va2 s . c om*/ * * @param value the value being processed * @return the set of windows in which the element was actually processed */ private Collection<W> processElement(Map<W, W> windowToMergeResult, WindowedValue<InputT> value) throws Exception { // Redirect element windows to the ACTIVE windows they have been merged into. // The compressed representation (value, {window1, window2, ...}) actually represents // distinct elements (value, window1), (value, window2), ... // so if window1 and window2 merge, the resulting window will contain both copies // of the value. Collection<W> windows = new ArrayList<>(); for (BoundedWindow untypedWindow : value.getWindows()) { @SuppressWarnings("unchecked") W window = (W) untypedWindow; W mergeResult = windowToMergeResult.get(window); if (mergeResult == null) { mergeResult = window; } windows.add(mergeResult); } // Prefetch in each of the windows if we're going to need to process triggers for (W window : windows) { ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT); triggerRunner.prefetchForValue(window, directContext.state()); } // Process the element for each (mergeResultWindow, not closed) window it belongs to. List<W> triggerableWindows = new ArrayList<>(windows.size()); for (W window : windows) { ReduceFn<K, InputT, OutputT, W>.ProcessValueContext directContext = contextFactory.forValue(window, value.getValue(), value.getTimestamp(), StateStyle.DIRECT); if (triggerRunner.isClosed(directContext.state())) { // This window has already been closed. droppedDueToClosedWindow.addValue(1L); WindowTracing.debug( "ReduceFnRunner.processElement: Dropping element at {} for key:{}; window:{} " + "since window is no longer active at inputWatermark:{}; outputWatermark:{}", value.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime()); continue; } triggerableWindows.add(window); activeWindows.ensureWindowIsActive(window); ReduceFn<K, InputT, OutputT, W>.ProcessValueContext renamedContext = contextFactory.forValue(window, value.getValue(), value.getTimestamp(), StateStyle.RENAMED); nonEmptyPanes.recordContent(renamedContext.state()); // Make sure we've scheduled the end-of-window or garbage collection timer for this window. Instant timer = scheduleEndOfWindowOrGarbageCollectionTimer(directContext); // Hold back progress of the output watermark until we have processed the pane this // element will be included within. If the element is too late for that, place a hold at // the end-of-window or garbage collection time to allow empty panes to contribute elements // which won't be dropped due to lateness by a following computation (assuming the following // computation uses the same allowed lateness value...) @Nullable Instant hold = watermarkHold.addHolds(renamedContext); if (hold != null) { // Assert that holds have a proximate timer. boolean holdInWindow = !hold.isAfter(window.maxTimestamp()); boolean timerInWindow = !timer.isAfter(window.maxTimestamp()); Preconditions.checkState(holdInWindow == timerInWindow, "set a hold at %s, a timer at %s, which disagree as to whether they are in window %s", hold, timer, directContext.window()); } // Execute the reduceFn, which will buffer the value as appropriate reduceFn.processValue(renamedContext); // Run the trigger to update its state triggerRunner.processValue(directContext.window(), directContext.timestamp(), directContext.timers(), directContext.state()); // At this point, if triggerRunner.shouldFire before the processValue then // triggerRunner.shouldFire after the processValue. In other words adding values // cannot take a trigger state from firing to non-firing. // (We don't actually assert this since it is too slow.) } return triggerableWindows; }
From source file:com.google.cloud.dataflow.sdk.util.ReduceFnRunner.java
License:Apache License
/** * Called when an end-of-window, garbage collection, or trigger-specific timer fires. *//* w ww . j a va 2s.co m*/ public void onTimer(TimerData timer) throws Exception { // Which window is the timer for? Preconditions.checkArgument(timer.getNamespace() instanceof WindowNamespace, "Expected timer to be in WindowNamespace, but was in %s", timer.getNamespace()); @SuppressWarnings("unchecked") WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace(); W window = windowNamespace.getWindow(); ReduceFn<K, InputT, OutputT, W>.Context directContext = contextFactory.base(window, StateStyle.DIRECT); ReduceFn<K, InputT, OutputT, W>.Context renamedContext = contextFactory.base(window, StateStyle.RENAMED); // Has this window had its trigger finish? // - The trigger may implement isClosed as constant false. // - If the window function does not support windowing then all windows will be considered // active. // So we must take conjunction of activeWindows and triggerRunner state. boolean windowIsActiveAndOpen = activeWindows.isActive(window) && !triggerRunner.isClosed(directContext.state()); if (!windowIsActiveAndOpen) { WindowTracing.debug("ReduceFnRunner.onTimer: Note that timer {} is for non-ACTIVE window {}", timer, window); } // If this is an end-of-window timer then we may need to set a garbage collection timer // if allowed lateness is non-zero. boolean isEndOfWindow = TimeDomain.EVENT_TIME == timer.getDomain() && timer.getTimestamp().equals(window.maxTimestamp()); // If this is a garbage collection timer then we should trigger and garbage collect the window. // We'll consider any timer at or after the end-of-window time to be a signal to garbage // collect. Instant cleanupTime = garbageCollectionTime(window); boolean isGarbageCollection = TimeDomain.EVENT_TIME == timer.getDomain() && !timer.getTimestamp().isBefore(cleanupTime); if (isGarbageCollection) { WindowTracing.debug( "ReduceFnRunner.onTimer: Cleaning up for key:{}; window:{} at {} with " + "inputWatermark:{}; outputWatermark:{}", key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime()); if (windowIsActiveAndOpen) { // We need to call onTrigger to emit the final pane if required. // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted, // and the watermark has passed the end of the window. @Nullable Instant newHold = onTrigger(directContext, renamedContext, true/* isFinished */, isEndOfWindow); Preconditions.checkState(newHold == null, "Hold placed at %s despite isFinished being true.", newHold); } // Cleanup flavor B: Clear all the remaining state for this window since we'll never // see elements for it again. clearAllState(directContext, renamedContext, windowIsActiveAndOpen); } else { WindowTracing.debug( "ReduceFnRunner.onTimer: Triggering for key:{}; window:{} at {} with " + "inputWatermark:{}; outputWatermark:{}", key, window, timer.getTimestamp(), timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime()); if (windowIsActiveAndOpen) { emitIfAppropriate(directContext, renamedContext); } if (isEndOfWindow) { // If the window strategy trigger includes a watermark trigger then at this point // there should be no data holds, either because we'd already cleared them on an // earlier onTrigger, or because we just cleared them on the above emitIfAppropriate. // We could assert this but it is very expensive. // Since we are processing an on-time firing we should schedule the garbage collection // timer. (If getAllowedLateness is zero then the timer event will be considered a // cleanup event and handled by the above). // Note we must do this even if the trigger is finished so that we are sure to cleanup // any final trigger finished bits. Preconditions.checkState(windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO), "Unexpected zero getAllowedLateness"); WindowTracing.debug( "ReduceFnRunner.onTimer: Scheduling cleanup timer for key:{}; window:{} at {} with " + "inputWatermark:{}; outputWatermark:{}", key, directContext.window(), cleanupTime, timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime()); Preconditions.checkState(!cleanupTime.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "Cleanup time %s is beyond end-of-time", cleanupTime); directContext.timers().setTimer(cleanupTime, TimeDomain.EVENT_TIME); } } }
From source file:com.google.cloud.dataflow.sdk.util.ReduceFnRunner.java
License:Apache License
/** * Run the {@link ReduceFn#onTrigger} method and produce any necessary output. * * @return output watermark hold added, or {@literal null} if none. *//*w w w.java2 s . c o m*/ @Nullable private Instant onTrigger(final ReduceFn<K, InputT, OutputT, W>.Context directContext, ReduceFn<K, InputT, OutputT, W>.Context renamedContext, boolean isFinished, boolean isEndOfWindow) throws Exception { Instant inputWM = timerInternals.currentInputWatermarkTime(); // Prefetch necessary states ReadableState<WatermarkHold.OldAndNewHolds> outputTimestampFuture = watermarkHold .extractAndRelease(renamedContext, isFinished).readLater(); ReadableState<PaneInfo> paneFuture = paneInfoTracker.getNextPaneInfo(directContext, isFinished).readLater(); ReadableState<Boolean> isEmptyFuture = nonEmptyPanes.isEmpty(renamedContext.state()).readLater(); reduceFn.prefetchOnTrigger(directContext.state()); triggerRunner.prefetchOnFire(directContext.window(), directContext.state()); // Calculate the pane info. final PaneInfo pane = paneFuture.read(); // Extract the window hold, and as a side effect clear it. WatermarkHold.OldAndNewHolds pair = outputTimestampFuture.read(); final Instant outputTimestamp = pair.oldHold; @Nullable Instant newHold = pair.newHold; if (newHold != null) { // We can't be finished yet. Preconditions.checkState(!isFinished, "new hold at %s but finished %s", newHold, directContext.window()); // The hold cannot be behind the input watermark. Preconditions.checkState(!newHold.isBefore(inputWM), "new hold %s is before input watermark %s", newHold, inputWM); if (newHold.isAfter(directContext.window().maxTimestamp())) { // The hold must be for garbage collection, which can't have happened yet. Preconditions.checkState(newHold.isEqual(garbageCollectionTime(directContext.window())), "new hold %s should be at garbage collection for window %s plus %s", newHold, directContext.window(), windowingStrategy.getAllowedLateness()); } else { // The hold must be for the end-of-window, which can't have happened yet. Preconditions.checkState(newHold.isEqual(directContext.window().maxTimestamp()), "new hold %s should be at end of window %s", newHold, directContext.window()); Preconditions.checkState(!isEndOfWindow, "new hold at %s for %s but this is the watermark trigger", newHold, directContext.window()); } } // Only emit a pane if it has data or empty panes are observable. if (needToEmit(isEmptyFuture.read(), isFinished, pane.getTiming())) { // Run reduceFn.onTrigger method. final List<W> windows = Collections.singletonList(directContext.window()); ReduceFn<K, InputT, OutputT, W>.OnTriggerContext renamedTriggerContext = contextFactory.forTrigger( directContext.window(), paneFuture, StateStyle.RENAMED, new OnTriggerCallbacks<OutputT>() { @Override public void output(OutputT toOutput) { // We're going to output panes, so commit the (now used) PaneInfo. // TODO: This is unnecessary if the trigger isFinished since the saved // state will be immediately deleted. paneInfoTracker.storeCurrentPaneInfo(directContext, pane); // Output the actual value. outputter.outputWindowedValue(KV.of(key, toOutput), outputTimestamp, windows, pane); } }); reduceFn.onTrigger(renamedTriggerContext); } return newHold; }
From source file:com.google.cloud.dataflow.sdk.util.ReduceFnRunner.java
License:Apache License
/** * Make sure we'll eventually have a timer fire which will tell us to garbage collect * the window state. For efficiency we may need to do this in two steps rather * than one. Return the time at which the timer will fire. * * <ul>/*w w w . ja va2s . c o m*/ * <li>If allowedLateness is zero then we'll garbage collect at the end of the window. * For simplicity we'll set our own timer for this situation even though an * {@link AfterWatermark} trigger may have also set an end-of-window timer. * ({@code setTimer} is idempotent.) * <li>If allowedLateness is non-zero then we could just always set a timer for the garbage * collection time. However if the windows are large (eg hourly) and the allowedLateness is small * (eg seconds) then we'll end up with nearly twice the number of timers in-flight. So we * instead set an end-of-window timer and then roll that forward to a garbage collection timer * when it fires. We use the input watermark to distinguish those cases. * </ul> */ private Instant scheduleEndOfWindowOrGarbageCollectionTimer(ReduceFn<?, ?, ?, W>.Context directContext) { Instant inputWM = timerInternals.currentInputWatermarkTime(); Instant endOfWindow = directContext.window().maxTimestamp(); String which; Instant timer; if (endOfWindow.isBefore(inputWM)) { timer = garbageCollectionTime(directContext.window()); which = "garbage collection"; } else { timer = endOfWindow; which = "end-of-window"; } WindowTracing.trace( "ReduceFnRunner.scheduleEndOfWindowOrGarbageCollectionTimer: Scheduling {} timer at {} for " + "key:{}; window:{} where inputWatermark:{}; outputWatermark:{}", which, timer, key, directContext.window(), inputWM, timerInternals.currentOutputWatermarkTime()); Preconditions.checkState(!timer.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "Timer %s is beyond end-of-time", timer); directContext.timers().setTimer(timer, TimeDomain.EVENT_TIME); return timer; }