List of usage examples for org.apache.hadoop.fs FileContext getLocalFSFileContext
public static FileContext getLocalFSFileContext() throws UnsupportedFileSystemException
From source file:com.datatorrent.lib.io.fs.AbstractFileInputOperatorTest.java
License:Open Source License
@Test public void testRecoveryWithCurrentFile() throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); List<String> allLines = Lists.newArrayList(); HashSet<String> lines = Sets.newHashSet(); for (int line = 0; line < 5; line++) { lines.add("f0" + "l" + line); }// w w w . j a v a 2 s. c o m allLines.addAll(lines); File testFile = new File(testMeta.dir, "file0"); FileUtils.write(testFile, StringUtils.join(lines, '\n')); TestFileInputOperator oper = new TestFileInputOperator(); oper.scanner = null; oper.currentFile = testFile.getAbsolutePath(); oper.offset = 1; CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); oper.setDirectory(testMeta.dir); oper.setup(null); oper.beginWindow(0); oper.emitTuples(); oper.endWindow(); oper.teardown(); Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size()); Assert.assertEquals("lines", allLines.subList(1, allLines.size()), new ArrayList<String>(queryResults.collectedTuples)); }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorFailureHandlingTest.java
License:Open Source License
@Test public void testFailureHandling() throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); HashSet<String> allLines = Sets.newHashSet(); // Create files with 100 records. for (int file = 0; file < 10; file++) { HashSet<String> lines = Sets.newHashSet(); for (int line = 0; line < 10; line++) { lines.add("f" + file + "l" + line); }// w ww . j a v a 2 s . c om allLines.addAll(lines); FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n')); } Thread.sleep(10); TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); oper.setDirectory(testMeta.dir); oper.getScanner().setFilePatternRegexp(".*file[\\d]"); oper.setup(null); for (long wid = 0; wid < 1000; wid++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); } oper.teardown(); Assert.assertEquals("number tuples", 100, queryResults.collectedTuples.size()); Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples)); }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java
License:Open Source License
@Test public void testSinglePartiton() throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); HashSet<String> allLines = Sets.newHashSet(); for (int file = 0; file < 2; file++) { HashSet<String> lines = Sets.newHashSet(); for (int line = 0; line < 2; line++) { lines.add("f" + file + "l" + line); }// www . j a va 2 s . c o m allLines.addAll(lines); FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n')); } TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); oper.setDirectory(testMeta.dir); oper.getScanner().setFilePatternRegexp(".*file[\\d]"); oper.setup(null); for (long wid = 0; wid < 3; wid++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); } oper.teardown(); Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size()); Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples)); }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java
License:Open Source License
@Test public void testPartitioning() throws Exception { TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), ""); }//from w ww . j a v a2 s .co m List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper)); Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = oper .definePartitions(partitions, 1); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(2, oper.getCurrentPartitions()); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { Assert.assertNotSame(oper, p.getPartitionedInstance()); Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner()); Set<String> consumed = Sets.newHashSet(); LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner() .scan(FileSystem.getLocal(new Configuration(false)), path, consumed); Assert.assertEquals("partition " + files, 2, files.size()); } }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java
License:Open Source License
/** * Test for testing dynamic partitioning. * - Create 4 file with 3 records each.//from ww w. jav a 2 s. com * - Create a single partition, and read all records, populating pending files in operator. * - Split it in two operators * - Try to emit records again, expected result is no record is emitted, as all files are * processed. * - Create another 4 files with 3 records each * - Try to emit records again, expected result total record emitted 4 * 3 = 12. */ @Test public void testPartitioningStateTransfer() throws Exception { TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); oper.setScanIntervalMillis(0); TestFSDirectoryInputOperator initialState = new Kryo().copy(oper); // Create 4 files with 3 records each. Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); int file = 0; for (file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n"); } CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); int wid = 0; // Read all records to populate processedList in operator. oper.setup(null); for (int i = 0; i < 10; i++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); wid++; } Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size()); Assert.assertEquals(1, initialState.getCurrentPartitions()); initialState.setPartitionCount(2); StatsListener.Response rsp = initialState.processStats(null); Assert.assertEquals(true, rsp.repartitionRequired); // Create partitions of the operator. List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper)); // incremental capacity controlled partitionCount property Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = initialState .definePartitions(partitions, 0); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, initialState.getCurrentPartitions()); Map<Integer, Partition<AbstractFSDirectoryInputOperator<String>>> m = Maps.newHashMap(); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { m.put(m.size(), p); } initialState.partitioned(m); Assert.assertEquals(2, initialState.getCurrentPartitions()); /* Collect all operators in a list */ List<AbstractFSDirectoryInputOperator<String>> opers = Lists.newArrayList(); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { TestFSDirectoryInputOperator oi = (TestFSDirectoryInputOperator) p.getPartitionedInstance(); oi.setup(null); oi.output.setSink(sink); opers.add(oi); } sink.clear(); for (int i = 0; i < 10; i++) { for (AbstractFSDirectoryInputOperator<String> o : opers) { o.beginWindow(wid); o.emitTuples(); o.endWindow(); } wid++; } // No record should be read. Assert.assertEquals("No new tuples read ", 0, sink.collectedTuples.size()); // Add four new files with 3 records each. for (; file < 8; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n"); } for (int i = 0; i < 10; i++) { for (AbstractFSDirectoryInputOperator<String> o : opers) { o.beginWindow(wid); o.emitTuples(); o.endWindow(); } wid++; } // If all files are processed only once then number of records emitted should // be 12. Assert.assertEquals("All tuples read ", 12, sink.collectedTuples.size()); }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java
License:Open Source License
/** * Test for testing dynamic partitioning. * - Create 4 file with 3 records each.// w w w . j a va 2 s . c o m * - Create a single partition, and read some records, populating pending files in operator. * - Split it in two operators * - Try to emit the remaining records. */ @Test public void testPartitioningStateTransferInterrupted() throws Exception { TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); oper.setScanIntervalMillis(0); oper.setEmitBatchSize(2); TestFSDirectoryInputOperator initialState = new Kryo().copy(oper); // Create 4 files with 3 records each. Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); int file = 0; for (file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n"); } CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); int wid = 0; //Read some records oper.setup(null); for (int i = 0; i < 5; i++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); wid++; } Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size()); Assert.assertEquals(1, initialState.getCurrentPartitions()); initialState.setPartitionCount(2); StatsListener.Response rsp = initialState.processStats(null); Assert.assertEquals(true, rsp.repartitionRequired); // Create partitions of the operator. List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper)); // incremental capacity controlled partitionCount property Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = initialState .definePartitions(partitions, 0); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, initialState.getCurrentPartitions()); Map<Integer, Partition<AbstractFSDirectoryInputOperator<String>>> m = Maps.newHashMap(); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { m.put(m.size(), p); } initialState.partitioned(m); Assert.assertEquals(2, initialState.getCurrentPartitions()); /* Collect all operators in a list */ List<AbstractFSDirectoryInputOperator<String>> opers = Lists.newArrayList(); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { TestFSDirectoryInputOperator oi = (TestFSDirectoryInputOperator) p.getPartitionedInstance(); oi.setup(null); oi.output.setSink(sink); opers.add(oi); } sink.clear(); for (int i = 0; i < 10; i++) { for (AbstractFSDirectoryInputOperator<String> o : opers) { o.beginWindow(wid); o.emitTuples(); o.endWindow(); } wid++; } Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size()); }
From source file:com.datatorrent.lib.io.fs.AbstractFSDirectoryInputOperatorTest.java
License:Open Source License
/** * Test for testing dynamic partitioning interrupting ongoing read. * - Create 4 file with 3 records each./*w w w . jav a 2s .co m*/ * - Create a single partition, and read some records, populating pending files in operator. * - Split it in two operators * - Try to emit the remaining records. */ @Test public void testPartitioningStateTransferFailure() throws Exception { TestFSDirectoryInputOperator oper = new TestFSDirectoryInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); oper.setScanIntervalMillis(0); oper.setEmitBatchSize(2); TestFSDirectoryInputOperator initialState = new Kryo().copy(oper); // Create 4 files with 3 records each. Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); int file = 0; for (file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), "a\nb\nc\n"); } CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); int wid = 0; //Read some records oper.setup(null); for (int i = 0; i < 5; i++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); wid++; } Assert.assertEquals("Partial tuples read ", 6, sink.collectedTuples.size()); Assert.assertEquals(1, initialState.getCurrentPartitions()); initialState.setPartitionCount(2); StatsListener.Response rsp = initialState.processStats(null); Assert.assertEquals(true, rsp.repartitionRequired); // Create partitions of the operator. List<Partition<AbstractFSDirectoryInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFSDirectoryInputOperator<String>>(oper)); // incremental capacity controlled partitionCount property Collection<Partition<AbstractFSDirectoryInputOperator<String>>> newPartitions = initialState .definePartitions(partitions, 0); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, initialState.getCurrentPartitions()); Map<Integer, Partition<AbstractFSDirectoryInputOperator<String>>> m = Maps.newHashMap(); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { m.put(m.size(), p); } initialState.partitioned(m); Assert.assertEquals(2, initialState.getCurrentPartitions()); /* Collect all operators in a list */ List<AbstractFSDirectoryInputOperator<String>> opers = Lists.newArrayList(); for (Partition<AbstractFSDirectoryInputOperator<String>> p : newPartitions) { TestFSDirectoryInputOperator oi = (TestFSDirectoryInputOperator) p.getPartitionedInstance(); oi.setup(null); oi.output.setSink(sink); opers.add(oi); } sink.clear(); for (int i = 0; i < 10; i++) { for (AbstractFSDirectoryInputOperator<String> o : opers) { o.beginWindow(wid); o.emitTuples(); o.endWindow(); } wid++; } // No record should be read. Assert.assertEquals("Remaining tuples read ", 6, sink.collectedTuples.size()); }
From source file:com.datatorrent.lib.io.fs.FileSplitterInputTest.java
License:Apache License
static Set<String> createData(String dataDirectory) throws IOException { Set<String> filePaths = Sets.newHashSet(); FileContext.getLocalFSFileContext().delete(new Path(new File(dataDirectory).getAbsolutePath()), true); HashSet<String> allLines = Sets.newHashSet(); for (int file = 0; file < 12; file++) { HashSet<String> lines = Sets.newHashSet(); for (int line = 0; line < 2; line++) { lines.add("f" + file + "l" + line); }/*from w w w. ja v a 2s . co m*/ allLines.addAll(lines); File created = new File(dataDirectory, "file" + file + ".txt"); filePaths.add(created.getAbsolutePath()); FileUtils.write(created, StringUtils.join(lines, '\n')); } return filePaths; }
From source file:com.datatorrent.stram.CheckpointTest.java
License:Apache License
/** * * @throws IOException/*from w w w. java2s . com*/ */ @Before public void setupEachTest() throws IOException { try { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); } catch (Exception e) { throw new RuntimeException("could not cleanup test dir", e); } //StramChild.eventloop.start(); }
From source file:com.datatorrent.stram.StramLocalCluster.java
License:Apache License
public StramLocalCluster(LogicalPlan dag) throws IOException, ClassNotFoundException { dag.validate();//from w ww . j av a2 s .c o m // ensure plan can be serialized cloneLogicalPlan(dag); // convert to URI so we always write to local file system, // even when the environment has a default HDFS location. String pathUri = CLUSTER_WORK_DIR.toURI().toString(); try { FileContext.getLocalFSFileContext().delete(new Path(pathUri/*CLUSTER_WORK_DIR.getAbsolutePath()*/), true); } catch (IllegalArgumentException e) { throw e; } catch (IOException e) { throw new RuntimeException("could not cleanup test dir", e); } dag.getAttributes().put(LogicalPlan.APPLICATION_ID, "app_local_" + System.currentTimeMillis()); if (dag.getAttributes().get(LogicalPlan.APPLICATION_PATH) == null) { dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, pathUri); } if (dag.getAttributes().get(OperatorContext.STORAGE_AGENT) == null) { dag.setAttribute(OperatorContext.STORAGE_AGENT, new FSStorageAgent(new Path(pathUri, LogicalPlan.SUBDIR_CHECKPOINTS).toString(), null)); } this.dnmgr = new StreamingContainerManager(dag); this.umbilical = new UmbilicalProtocolLocalImpl(); if (!perContainerBufferServer) { StreamingContainer.eventloop.start(); bufferServer = new Server(0, 1024 * 1024, 8); bufferServer.setSpoolStorage(new DiskStorage()); SocketAddress bindAddr = bufferServer.run(StreamingContainer.eventloop); this.bufferServerAddress = ((InetSocketAddress) bindAddr); LOG.info("Buffer server started: {}", bufferServerAddress); } }