List of usage examples for org.apache.hadoop.hdfs.inotify EventBatch getEvents
public Event[] getEvents()
From source file:INotifyUtil.java
License:Apache License
/** * Poll events and output the details.//from w ww . j a va 2 s . c o m * Ctrl + C to stop polling. * @param args the parameter is not used. * @throws IOException if configuration error or I/O error happens. */ public static void main(String args[]) throws IOException { Configuration conf = new HdfsConfiguration(); DFSClient client = new DFSClient(DFSUtilClient.getNNAddress(conf), conf); DFSInotifyEventInputStream iStream = client.getInotifyEventStream(); while (true) { try { EventBatch eventBatch = iStream.take(); for (Event event : eventBatch.getEvents()) { System.out.println(event.toString()); } } catch (InterruptedException e) { System.out.println("Interrupted. Exiting..."); return; } catch (MissingEventsException e) { e.printStackTrace(); return; } } }
From source file:alluxio.underfs.hdfs.activesync.SupportedHdfsActiveSyncProvider.java
License:Apache License
/** * Fetch and process events.//from w w w .j a v a 2 s.c o m * @param eventStream event stream */ public void pollEvent(DFSInotifyEventInputStream eventStream) { EventBatch batch; LOG.debug("Polling thread starting, with timeout {} ms", mActiveUfsPollTimeoutMs); int count = 0; long start = System.currentTimeMillis(); long behind = eventStream.getTxidsBehindEstimate(); while (!Thread.currentThread().isInterrupted()) { try { batch = eventStream.poll(mActiveUfsPollTimeoutMs, TimeUnit.MILLISECONDS); if (batch != null) { long txId = batch.getTxid(); count++; for (Event event : batch.getEvents()) { processEvent(event, mUfsUriList, txId); } } long end = System.currentTimeMillis(); if (end > (start + mActiveUfsSyncEventRateInterval)) { long currentlyBehind = eventStream.getTxidsBehindEstimate(); LOG.info("HDFS generated {} events in {} ms, at a rate of {} rps", count + currentlyBehind - behind, end - start, String.format("%.2f", (count + currentlyBehind - behind) * 1000.0 / (end - start))); LOG.info("processed {} events in {} ms, at a rate of {} rps", count, end - start, String.format("%.2f", count * 1000.0 / (end - start))); LOG.info("Currently TxidsBehindEstimate by {}", currentlyBehind); behind = currentlyBehind; start = end; count = 0; } } catch (IOException e) { LOG.warn("IOException occured during polling inotify {}", e); if (e.getCause() instanceof InterruptedException) { return; } } catch (MissingEventsException e) { LOG.warn("MissingEventException during polling {}", e); mEventMissed = true; // need to sync all syncpoints at this point } catch (InterruptedException e) { LOG.warn("InterruptedException during polling {}", e); return; } } }
From source file:org.apache.nifi.processors.hadoop.inotify.GetHDFSEvents.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final StateManager stateManager = context.getStateManager(); try {/*from w w w. ja va 2 s . c om*/ StateMap state = stateManager.getState(Scope.CLUSTER); String txIdAsString = state.get(LAST_TX_ID); if (txIdAsString != null && !"".equals(txIdAsString)) { lastTxId = Long.parseLong(txIdAsString); } } catch (IOException e) { getLogger().error( "Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e); context.yield(); return; } try { final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger(); final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS; final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit); final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId); final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries); if (eventBatch != null && eventBatch.getEvents() != null) { if (eventBatch.getEvents().length > 0) { List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length); for (Event e : eventBatch.getEvents()) { if (toProcessEvent(context, e)) { getLogger().debug("Creating flow file for event: {}.", new Object[] { e }); final String path = getPath(e); FlowFile flowFile = session.create(); flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json"); flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name()); flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path); flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(OutputStream out) throws IOException { out.write(OBJECT_MAPPER.writeValueAsBytes(e)); } }); flowFiles.add(flowFile); } } for (FlowFile flowFile : flowFiles) { final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH); final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path; getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri }); session.transfer(flowFile, REL_SUCCESS); session.getProvenanceReporter().receive(flowFile, transitUri); } } lastTxId = eventBatch.getTxid(); } } catch (IOException | InterruptedException e) { getLogger().error("Unable to get notification information: {}", new Object[] { e }); context.yield(); return; } catch (MissingEventsException e) { // set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the // org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used. lastTxId = -1L; getLogger().error( "Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e }); } updateClusterStateForTxId(stateManager); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java
License:Apache License
@Test public void onTriggerShouldProperlyHandleAnEmptyEventBatch() throws Exception { EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(new Event[] {}); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second"); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path"); runner.setProperty(GetHDFSEvents.NUMBER_OF_RETRIES_FOR_POLL, "5"); runner.run();/*from w w w . ja v a2 s. c om*/ List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(0, successfulFlowFiles.size()); verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java
License:Apache License
@Test public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception { Event[] events = getEvents(); EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second"); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*"); runner.run();//from w ww . java 2 s .co m List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(3, successfulFlowFiles.size()); verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java
License:Apache License
@Test public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception { Event[] events = getEvents(); EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?"); runner.run();//w w w.j a v a 2 s.c o m List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(1, successfulFlowFiles.size()); verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java
License:Apache License
@Test public void eventsProcessorShouldProperlyFilterEventTypes() throws Exception { Event[] events = getEvents(); EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/.*)?"); runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create, metadata"); runner.run();//from w w w . j av a 2s . com List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(2, successfulFlowFiles.size()); List<String> expectedEventTypes = Arrays.asList("CREATE", "METADATA"); for (MockFlowFile f : successfulFlowFiles) { String eventType = f.getAttribute(EventAttributes.EVENT_TYPE); assertTrue(expectedEventTypes.contains(eventType)); } verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); }
From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java
License:Apache License
@Test public void makeSureExpressionLanguageIsWorkingProperlyWithinTheHdfsPathToWatch() throws Exception { Event[] events = new Event[] { new Event.CreateEvent.Builder().path("/some/path/1/2/3/t.txt").build(), new Event.CreateEvent.Builder().path("/some/path/1/2/4/t.txt").build(), new Event.CreateEvent.Builder().path("/some/path/1/2/3/.t.txt").build() }; EventBatch eventBatch = mock(EventBatch.class); when(eventBatch.getEvents()).thenReturn(events); when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch); when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream); when(eventBatch.getTxid()).thenReturn(100L); GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin); TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/${literal(1)}/${literal(2)}/${literal(3)}/.*.txt"); runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create"); runner.setProperty(GetHDFSEvents.IGNORE_HIDDEN_FILES, "true"); runner.run();//from w w w .j ava2 s. c om List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS); assertEquals(1, successfulFlowFiles.size()); for (MockFlowFile f : successfulFlowFiles) { String eventType = f.getAttribute(EventAttributes.EVENT_TYPE); assertTrue(eventType.equals("CREATE")); } verify(eventBatch).getTxid(); assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id")); }