Example usage for org.apache.hadoop.hdfs.inotify EventBatch getEvents

List of usage examples for org.apache.hadoop.hdfs.inotify EventBatch getEvents

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs.inotify EventBatch getEvents.

Prototype

public Event[] getEvents() 

Source Link

Usage

From source file:INotifyUtil.java

License:Apache License

/**
 * Poll events and output the details.//from w  ww . j  a va 2  s .  c  o  m
 * Ctrl + C to stop polling.
 * @param args the parameter is not used.
 * @throws IOException if configuration error or I/O error happens.
 */
public static void main(String args[]) throws IOException {
    Configuration conf = new HdfsConfiguration();
    DFSClient client = new DFSClient(DFSUtilClient.getNNAddress(conf), conf);
    DFSInotifyEventInputStream iStream = client.getInotifyEventStream();
    while (true) {
        try {
            EventBatch eventBatch = iStream.take();
            for (Event event : eventBatch.getEvents()) {
                System.out.println(event.toString());
            }
        } catch (InterruptedException e) {
            System.out.println("Interrupted. Exiting...");
            return;
        } catch (MissingEventsException e) {
            e.printStackTrace();
            return;
        }
    }
}

From source file:alluxio.underfs.hdfs.activesync.SupportedHdfsActiveSyncProvider.java

License:Apache License

/**
 * Fetch and process events.//from  w  w  w  .j a  v  a 2  s.c  o  m
 * @param eventStream event stream
 */
public void pollEvent(DFSInotifyEventInputStream eventStream) {
    EventBatch batch;
    LOG.debug("Polling thread starting, with timeout {} ms", mActiveUfsPollTimeoutMs);
    int count = 0;
    long start = System.currentTimeMillis();

    long behind = eventStream.getTxidsBehindEstimate();

    while (!Thread.currentThread().isInterrupted()) {
        try {
            batch = eventStream.poll(mActiveUfsPollTimeoutMs, TimeUnit.MILLISECONDS);

            if (batch != null) {
                long txId = batch.getTxid();
                count++;
                for (Event event : batch.getEvents()) {
                    processEvent(event, mUfsUriList, txId);
                }
            }
            long end = System.currentTimeMillis();
            if (end > (start + mActiveUfsSyncEventRateInterval)) {
                long currentlyBehind = eventStream.getTxidsBehindEstimate();
                LOG.info("HDFS generated {} events in {} ms, at a rate of {} rps",
                        count + currentlyBehind - behind, end - start,
                        String.format("%.2f", (count + currentlyBehind - behind) * 1000.0 / (end - start)));
                LOG.info("processed {} events in {} ms, at a rate of {} rps", count, end - start,
                        String.format("%.2f", count * 1000.0 / (end - start)));
                LOG.info("Currently TxidsBehindEstimate by {}", currentlyBehind);
                behind = currentlyBehind;
                start = end;
                count = 0;
            }
        } catch (IOException e) {
            LOG.warn("IOException occured during polling inotify {}", e);
            if (e.getCause() instanceof InterruptedException) {
                return;
            }
        } catch (MissingEventsException e) {
            LOG.warn("MissingEventException during polling {}", e);
            mEventMissed = true;
            // need to sync all syncpoints at this point
        } catch (InterruptedException e) {
            LOG.warn("InterruptedException during polling {}", e);
            return;
        }
    }
}

From source file:org.apache.nifi.processors.hadoop.inotify.GetHDFSEvents.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final StateManager stateManager = context.getStateManager();

    try {/*from w w w. ja  va 2  s  . c om*/
        StateMap state = stateManager.getState(Scope.CLUSTER);
        String txIdAsString = state.get(LAST_TX_ID);

        if (txIdAsString != null && !"".equals(txIdAsString)) {
            lastTxId = Long.parseLong(txIdAsString);
        }
    } catch (IOException e) {
        getLogger().error(
                "Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.",
                e);
        context.yield();
        return;
    }

    try {
        final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
        final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
        final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
        final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream()
                : getHdfsAdmin().getInotifyEventStream(lastTxId);
        final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);

        if (eventBatch != null && eventBatch.getEvents() != null) {
            if (eventBatch.getEvents().length > 0) {
                List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
                for (Event e : eventBatch.getEvents()) {
                    if (toProcessEvent(context, e)) {
                        getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
                        final String path = getPath(e);

                        FlowFile flowFile = session.create();
                        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(),
                                "application/json");
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE,
                                e.getEventType().name());
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
                        flowFile = session.write(flowFile, new OutputStreamCallback() {
                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(OBJECT_MAPPER.writeValueAsBytes(e));
                            }
                        });

                        flowFiles.add(flowFile);
                    }
                }

                for (FlowFile flowFile : flowFiles) {
                    final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
                    final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
                    getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.",
                            new Object[] { flowFile, transitUri });
                    session.transfer(flowFile, REL_SUCCESS);
                    session.getProvenanceReporter().receive(flowFile, transitUri);
                }
            }

            lastTxId = eventBatch.getTxid();
        }
    } catch (IOException | InterruptedException e) {
        getLogger().error("Unable to get notification information: {}", new Object[] { e });
        context.yield();
        return;
    } catch (MissingEventsException e) {
        // set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
        // org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
        lastTxId = -1L;
        getLogger().error(
                "Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. "
                        + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}",
                new Object[] { e });
    }

    updateClusterStateForTxId(stateManager);
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java

License:Apache License

@Test
public void onTriggerShouldProperlyHandleAnEmptyEventBatch() throws Exception {
    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(new Event[] {});

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path");
    runner.setProperty(GetHDFSEvents.NUMBER_OF_RETRIES_FOR_POLL, "5");
    runner.run();/*from  w w w . ja v a2  s. c om*/

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(0, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java

License:Apache License

@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
    runner.run();//from   w ww .  java  2  s  .co  m

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(3, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java

License:Apache License

@Test
public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?");
    runner.run();//w  w w.j  a  v  a 2  s.c o m

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java

License:Apache License

@Test
public void eventsProcessorShouldProperlyFilterEventTypes() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/.*)?");
    runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create, metadata");
    runner.run();//from w w  w . j av  a  2s . com

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(2, successfulFlowFiles.size());

    List<String> expectedEventTypes = Arrays.asList("CREATE", "METADATA");
    for (MockFlowFile f : successfulFlowFiles) {
        String eventType = f.getAttribute(EventAttributes.EVENT_TYPE);
        assertTrue(expectedEventTypes.contains(eventType));
    }

    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}

From source file:org.apache.nifi.processors.hadoop.inotify.TestGetHDFSEvents.java

License:Apache License

@Test
public void makeSureExpressionLanguageIsWorkingProperlyWithinTheHdfsPathToWatch() throws Exception {
    Event[] events = new Event[] { new Event.CreateEvent.Builder().path("/some/path/1/2/3/t.txt").build(),
            new Event.CreateEvent.Builder().path("/some/path/1/2/4/t.txt").build(),
            new Event.CreateEvent.Builder().path("/some/path/1/2/3/.t.txt").build() };

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH,
            "/some/path/${literal(1)}/${literal(2)}/${literal(3)}/.*.txt");
    runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create");
    runner.setProperty(GetHDFSEvents.IGNORE_HIDDEN_FILES, "true");
    runner.run();//from  w  w w .j  ava2 s. c om

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());

    for (MockFlowFile f : successfulFlowFiles) {
        String eventType = f.getAttribute(EventAttributes.EVENT_TYPE);
        assertTrue(eventType.equals("CREATE"));
    }

    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}