Example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext

List of usage examples for org.apache.hadoop.fs FileContext getLocalFSFileContext

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext.

Prototype

public static FileContext getLocalFSFileContext() throws UnsupportedFileSystemException 

Source Link

Usage

From source file:com.datatorrent.lib.io.fs.AbstractFileInputOperatorTest.java

License:Open Source License

@Test
public void testRecoveryWithCurrentFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }//  w w w  .  j a v a 2  s.  c  o m
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));

    TestFileInputOperator oper = new TestFileInputOperator();
    oper.scanner = null;
    oper.currentFile = testFile.getAbsolutePath();
    oper.offset = 1;

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);

    oper.setup(null);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();

    oper.teardown();

    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines.subList(1, allLines.size()),
            new ArrayList<String>(queryResults.collectedTuples));
}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorFailureHandlingTest.java

License:Open Source License

@Test
public void testFailureHandling() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    // Create files with 100 records.
    for (int file = 0; file < 10; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 10; line++) {
            lines.add("f" + file + "l" + line);
        }//  w ww  . j  a v  a  2  s  . c  om
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }

    Thread.sleep(10);

    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(null);
    for (long wid = 0; wid < 1000; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();

    Assert.assertEquals("number tuples", 100, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));

}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java

License:Open Source License

@Test
public void testSinglePartiton() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    for (int file = 0; file < 2; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }// www  .  j a va  2  s .  c o  m
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }

    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(null);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();

    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));

}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java

License:Open Source License

@Test
public void testPartitioning() throws Exception {
    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());

    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }//from   w  ww . j a  v a2 s  .co  m

    List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper));
    Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = oper
            .definePartitions(partitions, 1);
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(2, oper.getCurrentPartitions());

    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner()
                .scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 2, files.size());
    }
}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java

License:Open Source License

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each.//from ww  w.  jav  a  2  s.  com
 * - Create a single partition, and read all records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit records again, expected result is no record is emitted, as all files are
 *   processed.
 * - Create another 4 files with 3 records each
 * - Try to emit records again, expected result total record emitted 4 * 3 = 12.
 */
@Test
public void testPartitioningStateTransfer() throws Exception {

    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);

    TestFSDirectoryInputOperator initialState = new Kryo().copy(oper);

    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file = 0;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    // Read all records to populate processedList in operator.
    oper.setup(null);
    for (int i = 0; i < 10; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());

    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);

    // Create partitions of the operator.
    List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = initialState
            .definePartitions(partitions, 0);
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFSDirectoryInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());

    /* Collect all operators in a list */
    List<AbstractFSDirectoryInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        TestFSDirectoryInputOperator oi = (TestFSDirectoryInputOperator) p.getPartitionedInstance();
        oi.setup(null);
        oi.output.setSink(sink);
        opers.add(oi);
    }

    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFSDirectoryInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    // No record should be read.
    Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size());

    // Add four new files with 3 records each.
    for (; file < 8; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    for (int i = 0; i < 10; i++) {
        for (AbstractFSDirectoryInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    // If all files are processed only once then number of records emitted should
    // be 12.
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java

License:Open Source License

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each.//  w w  w . j  a  va 2 s  . c  o  m
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferInterrupted() throws Exception {
    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);

    TestFSDirectoryInputOperator initialState = new Kryo().copy(oper);

    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file = 0;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    //Read some records
    oper.setup(null);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }

    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());

    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);

    // Create partitions of the operator.
    List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = initialState
            .definePartitions(partitions, 0);
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFSDirectoryInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());

    /* Collect all operators in a list */
    List<AbstractFSDirectoryInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        TestFSDirectoryInputOperator oi = (TestFSDirectoryInputOperator) p.getPartitionedInstance();
        oi.setup(null);
        oi.output.setSink(sink);
        opers.add(oi);
    }

    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFSDirectoryInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}

From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java

License:Open Source License

/**
 * Test for testing dynamic partitioning interrupting ongoing read.
 * - Create 4 file with 3 records each./*w w  w .  jav a 2s  .co m*/
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferFailure() throws Exception {
    TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);

    TestFSDirectoryInputOperator initialState = new Kryo().copy(oper);

    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file = 0;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    //Read some records
    oper.setup(null);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }

    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());

    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);

    // Create partitions of the operator.
    List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = initialState
            .definePartitions(partitions, 0);
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFSDirectoryInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());

    /* Collect all operators in a list */
    List<AbstractFSDirectoryInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) {
        TestFSDirectoryInputOperator oi = (TestFSDirectoryInputOperator) p.getPartitionedInstance();
        oi.setup(null);
        oi.output.setSink(sink);
        opers.add(oi);
    }

    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFSDirectoryInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    // No record should be read.
    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}

From source file:com.datatorrent.lib.io.fs.FileSplitterInputTest.java

License:Apache License

static Set<String> createData(String dataDirectory) throws IOException {
    Set<String> filePaths = Sets.newHashSet();
    FileContext.getLocalFSFileContext().delete(new Path(new File(dataDirectory).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    for (int file = 0; file < 12; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }/*from  w w w.  ja  v  a  2s  .  co  m*/
        allLines.addAll(lines);
        File created = new File(dataDirectory, "file" + file + ".txt");
        filePaths.add(created.getAbsolutePath());
        FileUtils.write(created, StringUtils.join(lines, '\n'));
    }
    return filePaths;
}

From source file:com.datatorrent.stram.CheckpointTest.java

License:Apache License

/**
 *
 * @throws IOException/*from   w  w  w. java2s  .  com*/
 */
@Before
public void setupEachTest() throws IOException {
    try {
        FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    } catch (Exception e) {
        throw new RuntimeException("could not cleanup test dir", e);
    }
    //StramChild.eventloop.start();
}

From source file:com.datatorrent.stram.StramLocalCluster.java

License:Apache License

public StramLocalCluster(LogicalPlan dag) throws IOException, ClassNotFoundException {
    dag.validate();//from   w  ww .  j av  a2  s  .c o m
    // ensure plan can be serialized
    cloneLogicalPlan(dag);
    // convert to URI so we always write to local file system,
    // even when the environment has a default HDFS location.
    String pathUri = CLUSTER_WORK_DIR.toURI().toString();
    try {
        FileContext.getLocalFSFileContext().delete(new Path(pathUri/*CLUSTER_WORK_DIR.getAbsolutePath()*/),
                true);
    } catch (IllegalArgumentException e) {
        throw e;
    } catch (IOException e) {
        throw new RuntimeException("could not cleanup test dir", e);
    }

    dag.getAttributes().put(LogicalPlan.APPLICATION_ID, "app_local_" + System.currentTimeMillis());
    if (dag.getAttributes().get(LogicalPlan.APPLICATION_PATH) == null) {
        dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, pathUri);
    }
    if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) {
        dag.setAttribute(OperatorContext.STORAGE_AGENT,
                new FSStorageAgent(new Path(pathUri, LogicalPlan.SUBDIR_CHECKPOINTS).toString(), null));
    }
    this.dnmgr = new StreamingContainerManager(dag);
    this.umbilical = new UmbilicalProtocolLocalImpl();

    if (!perContainerBufferServer) {
        StreamingContainer.eventloop.start();
        bufferServer = new Server(0, 1024 * 1024, 8);
        bufferServer.setSpoolStorage(new DiskStorage());
        SocketAddress bindAddr = bufferServer.run(StreamingContainer.eventloop);
        this.bufferServerAddress = ((InetSocketAddress) bindAddr);
        LOG.info("Buffer server started: {}", bufferServerAddress);
    }
}