List of usage examples for org.apache.hadoop.fs FileContext getLocalFSFileContext
public static FileContext getLocalFSFileContext() throws UnsupportedFileSystemException
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
@Test public void testIdempotencyWithMultipleEmitTuples() throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); List<String> allLines = Lists.newArrayList(); for (int file = 0; file < 2; file++) { List<String> lines = Lists.newArrayList(); for (int line = 0; line < 2; line++) { lines.add("f" + file + "l" + line); }/* ww w . j a v a2 s . c o m*/ allLines.addAll(lines); FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n')); } LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); FSWindowDataManager manager = new FSWindowDataManager(); manager.setStatePath(testMeta.dir + "/recovery"); oper.setWindowDataManager(manager); CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); TestUtils.setSink(oper.output, queryResults); oper.setDirectory(testMeta.dir); oper.getScanner().setFilePatternRegexp(".*file[\\d]"); oper.setup(testMeta.context); oper.beginWindow(0); for (int i = 0; i < 3; i++) { oper.emitTuples(); } oper.endWindow(); oper.teardown(); List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples); queryResults.clear(); //idempotency part oper.setup(testMeta.context); oper.beginWindow(0); oper.endWindow(); Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size()); Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples); oper.teardown(); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
@Test public void testIdempotencyWhenFileContinued() throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); List<String> lines = Lists.newArrayList(); for (int line = 0; line < 10; line++) { lines.add("l" + line); }// w w w. j a v a 2s .c o m FileUtils.write(new File(testMeta.dir, "file0"), StringUtils.join(lines, '\n')); LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); FSWindowDataManager manager = new FSWindowDataManager(); manager.setStatePath(testMeta.dir + "/recovery"); oper.setEmitBatchSize(5); oper.setWindowDataManager(manager); CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); oper.setDirectory(testMeta.dir); oper.getScanner().setFilePatternRegexp(".*file[\\d]"); oper.setup(testMeta.context); int offset = 0; for (long wid = 0; wid < 3; wid++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); if (wid > 0) { Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size()); Assert.assertEquals("lines", lines.subList(offset, offset + 5), queryResults.collectedTuples); offset += 5; } sink.clear(); } oper.teardown(); sink.clear(); //idempotency part offset = 0; oper.setup(testMeta.context); for (long wid = 0; wid < 3; wid++) { oper.beginWindow(wid); oper.endWindow(); if (wid > 0) { Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size()); Assert.assertEquals("lines", lines.subList(offset, offset + 5), queryResults.collectedTuples); offset += 5; } sink.clear(); } oper.teardown(); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
@Test public void testStateWithIdempotency() throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true); HashSet<String> allLines = Sets.newHashSet(); for (int file = 0; file < 3; file++) { HashSet<String> lines = Sets.newHashSet(); for (int line = 0; line < 2; line++) { lines.add("f" + file + "l" + line); }/*w ww.j a va 2 s.c o m*/ allLines.addAll(lines); FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n')); } LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); FSWindowDataManager manager = new FSWindowDataManager(); manager.setStatePath(testMeta.dir + "/recovery"); oper.setWindowDataManager(manager); CollectorTestSink<String> queryResults = new CollectorTestSink<String>(); @SuppressWarnings({ "unchecked", "rawtypes" }) CollectorTestSink<Object> sink = (CollectorTestSink) queryResults; oper.output.setSink(sink); oper.setDirectory(testMeta.dir); oper.getScanner().setFilePatternRegexp(".*file[\\d]"); oper.setup(testMeta.context); for (long wid = 0; wid < 4; wid++) { oper.beginWindow(wid); oper.emitTuples(); oper.endWindow(); } oper.teardown(); sink.clear(); //idempotency part oper.pendingFiles.add(new File(testMeta.dir, "file0").getAbsolutePath()); oper.failedFiles.add( new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file1").getAbsolutePath(), 0)); oper.unfinishedFiles.add( new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file2").getAbsolutePath(), 0)); oper.setup(testMeta.context); for (long wid = 0; wid < 4; wid++) { oper.beginWindow(wid); oper.endWindow(); } Assert.assertTrue("pending state", !oper.pendingFiles.contains("file0")); for (AbstractFileInputOperator.FailedFile failedFile : oper.failedFiles) { Assert.assertTrue("failed state", !failedFile.path.equals("file1")); } for (AbstractFileInputOperator.FailedFile unfinishedFile : oper.unfinishedFiles) { Assert.assertTrue("unfinished state", !unfinishedFile.path.equals("file2")); } oper.teardown(); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
public static <S extends AbstractFileInputOperator, T> void testIdempotencyWithCheckPoint(S oper, CollectorTestSink<T> queryResults, IdempotencyTestDriver<S> driver) throws Exception { FileContext.getLocalFSFileContext().delete(new Path(new File(driver.getDirectory()).getAbsolutePath()), true);/*from w w w .j av a 2 s. co m*/ int file = 0; driver.writeFile(5, "file" + file); file = 1; driver.writeFile(6, "file" + file); // empty file file = 2; driver.writeFile(0, "file" + file); FSWindowDataManager manager = new FSWindowDataManager(); manager.setStatePath(driver.getDirectory() + "/recovery"); oper.setWindowDataManager(manager); oper.setDirectory(driver.getDirectory()); oper.getScanner().setFilePatternRegexp(".*file[\\d]"); oper.setup(driver.getContext()); oper.setEmitBatchSize(3); // sort the pendingFiles and ensure the ordering of the files scanned DirectoryScannerNew newScanner = new DirectoryScannerNew(); oper.setScanner(newScanner); // scan directory oper.beginWindow(0); oper.emitTuples(); oper.endWindow(); // emit f0l0, f0l1, f0l2 oper.beginWindow(1); oper.emitTuples(); oper.endWindow(); //checkpoint the operator ByteArrayOutputStream bos = new ByteArrayOutputStream(); S checkPointOper = checkpoint(oper, bos); // start saving output driver.setSink(oper, queryResults); // emit f0l3, f0l4, and closeFile(f0) in the same window oper.beginWindow(2); oper.emitTuples(); oper.endWindow(); List<T> beforeRecovery2 = Lists.newArrayList(queryResults.collectedTuples); // emit f1l0, f1l1, f1l2 oper.beginWindow(3); oper.emitTuples(); oper.endWindow(); List<T> beforeRecovery3 = Lists.newArrayList(queryResults.collectedTuples); // emit f1l3, f1l4, f1l5 oper.beginWindow(4); oper.emitTuples(); oper.endWindow(); List<T> beforeRecovery4 = Lists.newArrayList(queryResults.collectedTuples); // closeFile(f1) in a new window oper.beginWindow(5); oper.emitTuples(); oper.endWindow(); List<T> beforeRecovery5 = Lists.newArrayList(queryResults.collectedTuples); // empty file ops, closeFile(f2) in emitTuples() only oper.beginWindow(6); oper.emitTuples(); oper.endWindow(); List<T> beforeRecovery6 = Lists.newArrayList(queryResults.collectedTuples); oper.teardown(); queryResults.clear(); //idempotency part oper = restoreCheckPoint(checkPointOper, bos); driver.getContext().getAttributes().put(Context.OperatorContext.ACTIVATION_WINDOW_ID, 1L); oper.setup(driver.getContext()); driver.setSink(oper, queryResults); long startwid = driver.getContext().getAttributes().get(Context.OperatorContext.ACTIVATION_WINDOW_ID) + 1; oper.beginWindow(startwid); Assert.assertTrue(oper.currentFile == null); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("lines", beforeRecovery2, queryResults.collectedTuples); oper.beginWindow(++startwid); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("lines", beforeRecovery3, queryResults.collectedTuples); oper.beginWindow(++startwid); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("lines", beforeRecovery4, queryResults.collectedTuples); oper.beginWindow(++startwid); Assert.assertTrue(oper.currentFile == null); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("lines", beforeRecovery5, queryResults.collectedTuples); oper.beginWindow(++startwid); Assert.assertTrue(oper.currentFile == null); oper.emitTuples(); oper.endWindow(); Assert.assertEquals("lines", beforeRecovery6, queryResults.collectedTuples); Assert.assertEquals("number tuples", 8, queryResults.collectedTuples.size()); oper.teardown(); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
@Test public void testWindowDataManagerPartitioning() throws Exception { LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); oper.setWindowDataManager(new FSWindowDataManager()); oper.operatorId = 7;//from w w w .j a v a 2s . co m Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 4; file++) { FileUtils.write(new File(testMeta.dir, "partition00" + file), ""); } List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper)); Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2)); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, oper.getCurrentPartitions()); List<FSWindowDataManager> storageManagers = Lists.newLinkedList(); for (Partition<AbstractFileInputOperator<String>> p : newPartitions) { storageManagers.add((FSWindowDataManager) p.getPartitionedInstance().getWindowDataManager()); } Assert.assertEquals("count of storage managers", 2, storageManagers.size()); int countOfDeleteManagers = 0; FSWindowDataManager deleteManager = null; for (FSWindowDataManager storageManager : storageManagers) { if (storageManager.getDeletedOperators() != null) { countOfDeleteManagers++; deleteManager = storageManager; } } Assert.assertEquals("count of delete managers", 1, countOfDeleteManagers); Assert.assertNotNull("deleted operators manager", deleteManager); Assert.assertEquals("deleted operators", Sets.newHashSet(7), deleteManager.getDeletedOperators()); }
From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java
License:Apache License
/** * Partition the operator in 2/*from ww w . j av a 2 s .c om*/ * create ten files with index of the file at the start, i.e 1_file, 2_file .. etc. * The scanner returns this index from getPartition method. * each partition should read 5 files as file index are from 0 to 9 (including 0 and 9). * @throws Exception */ @Test public void testWithCustomScanner() throws Exception { LineByLineFileInputOperator oper = new LineByLineFileInputOperator(); oper.setScanner(new MyScanner()); oper.getScanner().setFilePatternRegexp(".*partition_([\\d]*)"); oper.setDirectory(new File(testMeta.dir).getAbsolutePath()); Random rand = new Random(); Path path = new Path(new File(testMeta.dir).getAbsolutePath()); FileContext.getLocalFSFileContext().delete(path, true); for (int file = 0; file < 10; file++) { FileUtils.write(new File(testMeta.dir, file + "_partition_00" + rand.nextInt(100)), ""); } List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList(); partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper)); Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions, new PartitioningContextImpl(null, 2)); Assert.assertEquals(2, newPartitions.size()); Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called for (Partition<AbstractFileInputOperator<String>> p : newPartitions) { Assert.assertNotSame(oper, p.getPartitionedInstance()); Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner()); Set<String> consumed = Sets.newHashSet(); LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner() .scan(FileSystem.getLocal(new Configuration(false)), path, consumed); Assert.assertEquals("partition " + files, 6, files.size()); } }
From source file:org.apache.apex.malhar.lib.io.fs.FileSplitterInputTest.java
License:Apache License
static Set<String> createData(String dataDirectory) throws IOException { Set<String> filePaths = Sets.newHashSet(); FileContext.getLocalFSFileContext().delete(new Path(new File(dataDirectory).getAbsolutePath()), true); HashSet<String> allLines = Sets.newHashSet(); for (int file = 0; file < 12; file++) { HashSet<String> lines = Sets.newHashSet(); for (int line = 0; line < 2; line++) { //padding 0 to file number so every file has 6 blocks. lines.add("f" + String.format("%02d", file) + "l" + line); }/*w w w . j a va 2 s . co m*/ allLines.addAll(lines); File created = new File(dataDirectory, "file" + file + ".txt"); filePaths.add(created.getAbsolutePath()); FileUtils.write(created, StringUtils.join(lines, '\n')); } return filePaths; }
From source file:org.apache.metron.integration.components.YarnComponent.java
License:Apache License
@Override public void start() throws UnableToStartException { conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); conf.set("yarn.log.dir", "target"); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getName()); conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); try {/*from w w w. ja v a 2 s.c o m*/ yarnCluster = new MiniYARNCluster(testName, 1, NUM_NMS, 1, 1, true); yarnCluster.init(conf); yarnCluster.start(); waitForNMsToRegister(); URL url = Thread.currentThread().getContextClassLoader().getResource("yarn-site.xml"); if (url == null) { throw new RuntimeException("Could not find 'yarn-site.xml' dummy file in classpath"); } Configuration yarnClusterConfig = yarnCluster.getConfig(); yarnClusterConfig.set("yarn.application.classpath", new File(url.getPath()).getParent()); //write the document to a buffer (not directly to the file, as that //can cause the file being written to get read -which will then fail. ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); yarnClusterConfig.writeXml(bytesOut); bytesOut.close(); //write the bytes to the file in the classpath OutputStream os = new FileOutputStream(new File(url.getPath())); os.write(bytesOut.toByteArray()); os.close(); FileContext fsContext = FileContext.getLocalFSFileContext(); fsContext.delete(new Path(conf.get("yarn.timeline-service.leveldb-timeline-store.path")), true); try { Thread.sleep(2000); } catch (InterruptedException e) { } } catch (Exception e) { throw new UnableToStartException("Exception setting up yarn cluster", e); } }
From source file:org.apache.metron.integration.components.YarnComponent.java
License:Apache License
@Override public void stop() { if (yarnCluster != null) { try {/*ww w .j av a 2s. c o m*/ yarnCluster.stop(); } finally { yarnCluster = null; } } try { FileContext fsContext = FileContext.getLocalFSFileContext(); fsContext.delete(new Path(conf.get("yarn.timeline-service.leveldb-timeline-store.path")), true); } catch (Exception e) { } }
From source file:org.apache.solr.hadoop.hack.MiniYARNCluster.java
License:Apache License
/** * @param testName name of the test//w w w. j ava 2 s . c o m * @param noOfNodeManagers the number of node managers in the cluster * @param numLocalDirs the number of nm-local-dirs per nodemanager * @param numLogDirs the number of nm-log-dirs per nodemanager */ public MiniYARNCluster(String testName, int noOfNodeManagers, int numLocalDirs, int numLogDirs, File testWorkDir) { super(testName.replace("$", "")); this.numLocalDirs = numLocalDirs; this.numLogDirs = numLogDirs; String testSubDir = testName.replace("$", ""); File targetWorkDir = new File(testWorkDir, testSubDir); try { FileContext.getLocalFSFileContext().delete(new Path(targetWorkDir.getAbsolutePath()), true); } catch (Exception e) { LOG.warn("COULD NOT CLEANUP", e); throw new YarnRuntimeException("could not cleanup test dir: " + e, e); } if (Shell.WINDOWS) { // The test working directory can exceed the maximum path length supported // by some Windows APIs and cmd.exe (260 characters). To work around this, // create a symlink in temporary storage with a much shorter path, // targeting the full path to the test working directory. Then, use the // symlink as the test working directory. String targetPath = targetWorkDir.getAbsolutePath(); File link = new File(System.getProperty("java.io.tmpdir"), String.valueOf(System.currentTimeMillis())); String linkPath = link.getAbsolutePath(); try { FileContext.getLocalFSFileContext().delete(new Path(linkPath), true); } catch (IOException e) { throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e); } // Guarantee target exists before creating symlink. targetWorkDir.mkdirs(); ShellCommandExecutor shexec = new ShellCommandExecutor(Shell.getSymlinkCommand(targetPath, linkPath)); try { shexec.execute(); } catch (IOException e) { throw new YarnRuntimeException( String.format(Locale.ENGLISH, "failed to create symlink from %s to %s, shell output: %s", linkPath, targetPath, shexec.getOutput()), e); } this.testWorkDir = link; } else { this.testWorkDir = targetWorkDir; } resourceManagerWrapper = new ResourceManagerWrapper(); addService(resourceManagerWrapper); nodeManagers = new CustomNodeManager[noOfNodeManagers]; for (int index = 0; index < noOfNodeManagers; index++) { addService(new NodeManagerWrapper(index)); nodeManagers[index] = new CustomNodeManager(); } }