Example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext

List of usage examples for org.apache.hadoop.fs FileContext getLocalFSFileContext

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext.

Prototype

public static FileContext getLocalFSFileContext() throws UnsupportedFileSystemException 

Source Link

Usage

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testScannerPartitioning() throws Exception {
    DirectoryScanner scanner = new DirectoryScanner();
    scanner.setFilePatternRegexp(".*partition([\\d]*)");

    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }/*  w  ww  .j  a  va2  s .  co m*/

    FileSystem fs = FileSystem.get(FileContext.getLocalFSFileContext().getDefaultFileSystem().getUri(),
            new Configuration());
    List<DirectoryScanner> partitions = scanner.partition(2);
    Set<Path> allFiles = Sets.newHashSet();
    for (DirectoryScanner partition : partitions) {
        Set<Path> files = partition.scan(fs, path, Sets.<String>newHashSet());
        Assert.assertEquals("", 3, files.size());
        allFiles.addAll(files);
    }
    Assert.assertEquals("Found all files " + allFiles, 5, allFiles.size());

}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testPartitioning() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());

    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }//from  w  w  w. jav  a 2s.c o  m

    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions,
            new PartitioningContextImpl(null, 2));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called

    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner()
                .scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 3, files.size());
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each.//w  w  w .j  a  v  a2  s .co m
 * - Create a single partition, and read all records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit records again, expected result is no record is emitted, as all files are
 *   processed.
 * - Create another 4 files with 3 records each
 * - Try to emit records again, expected result total record emitted 4 * 3 = 12.
 */
@Test
public void testPartitioningStateTransfer() throws Exception {

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);

    LineByLineFileInputOperator initialState = new Kryo().copy(oper);

    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    // Read all records to populate processedList in operator.
    oper.setup(testMeta.context);
    for (int i = 0; i < 10; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());

    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);

    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState
            .definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());

    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }

    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    // No record should be read.
    Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size());

    // Add four new files with 3 records each.
    for (; file < 8; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    // If all files are processed only once then number of records emitted should
    // be 12.
    Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size());
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

/**
 * Test for testing dynamic partitioning.
 * - Create 4 file with 3 records each./* ww  w . j  a  va  2 s . c o  m*/
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferInterrupted() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);

    LineByLineFileInputOperator initialState = new Kryo().copy(oper);

    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    //Read some records
    oper.setup(testMeta.context);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }

    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());

    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);

    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState
            .definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());

    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }

    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

/**
 * Test for testing dynamic partitioning interrupting ongoing read.
 * - Create 4 file with 3 records each./*  w  ww . jav a2  s . co m*/
 * - Create a single partition, and read some records, populating pending files in operator.
 * - Split it in two operators
 * - Try to emit the remaining records.
 */
@Test
public void testPartitioningStateTransferFailure() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);
    oper.setEmitBatchSize(2);

    LineByLineFileInputOperator initialState = new Kryo().copy(oper);

    // Create 4 files with 3 records each.
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    int file;
    for (file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n");
    }

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    //Read some records
    oper.setup(testMeta.context);
    for (int i = 0; i < 5; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }

    Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size());

    Assert.assertEquals(1, initialState.getCurrentPartitions());
    initialState.setPartitionCount(2);
    StatsListener.Response rsp = initialState.processStats(null);
    Assert.assertEquals(true, rsp.repartitionRequired);

    // Create partitions of the operator.
    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    // incremental capacity controlled partitionCount property
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = initialState
            .definePartitions(partitions, new PartitioningContextImpl(null, 0));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, initialState.getCurrentPartitions());
    Map<Integer, Partition<AbstractFileInputOperator<String>>> m = Maps.newHashMap();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        m.put(m.size(), p);
    }
    initialState.partitioned(m);
    Assert.assertEquals(2, initialState.getCurrentPartitions());

    /* Collect all operators in a list */
    List<AbstractFileInputOperator<String>> opers = Lists.newArrayList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        LineByLineFileInputOperator oi = (LineByLineFileInputOperator) p.getPartitionedInstance();
        oi.setup(testMeta.context);
        oi.output.setSink(sink);
        opers.add(oi);
    }

    sink.clear();
    for (int i = 0; i < 10; i++) {
        for (AbstractFileInputOperator<String> o : opers) {
            o.beginWindow(wid);
            o.emitTuples();
            o.endWindow();
        }
        wid++;
    }

    // No record should be read.
    Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size());
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testRecoveryWithFailedFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }//  www .  j  a  v a 2s .  co m
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.scanner = null;
    oper.failedFiles.add(new AbstractFileInputOperator.FailedFile(testFile.getAbsolutePath(), 1));

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);

    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();

    oper.teardown();

    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines.subList(1, allLines.size()),
            new ArrayList<String>(queryResults.collectedTuples));
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testRecoveryWithUnfinishedFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }//  w  w w .  j  a v  a 2  s  .c  o m
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.scanner = null;
    oper.unfinishedFiles.add(new AbstractFileInputOperator.FailedFile(testFile.getAbsolutePath(), 2));

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);

    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();

    oper.teardown();

    Assert.assertEquals("number tuples", 3, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines.subList(2, allLines.size()),
            new ArrayList<String>(queryResults.collectedTuples));
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testRecoveryWithPendingFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }//w  w w.j  a  v a2  s. c  o  m
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.scanner = null;
    oper.pendingFiles.add(testFile.getAbsolutePath());

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);

    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();

    oper.teardown();

    Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new ArrayList<String>(queryResults.collectedTuples));
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testRecoveryWithCurrentFile() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }//from ww  w  . j a  v a2 s. c o m
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir, "file0");
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.scanner = null;
    oper.currentFile = testFile.getAbsolutePath();
    oper.offset = 1;

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);

    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();

    oper.teardown();

    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines.subList(1, allLines.size()),
            new ArrayList<String>(queryResults.collectedTuples));
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testIdempotency() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);

    List<String> allLines = Lists.newArrayList();
    for (int file = 0; file < 2; file++) {
        List<String> lines = Lists.newArrayList();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }// www.  j av  a2s.  c o m
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");

    oper.setWindowDataManager(manager);

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    TestUtils.setSink(oper.output, queryResults);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();
    List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);

    queryResults.clear();

    //idempotency  part
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.endWindow();
    }
    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
    oper.teardown();
}