List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:com.splunk.shuttl.testutil.FileSystemUtils.java
License:Apache License
public static FileSystem getLocalFileSystem() { Configuration configuration = new Configuration(); try {// w ww .j a v a 2 s. c o m return FileSystem.getLocal(configuration); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.splunk.shuttl.testutil.TUtilsFileSystem.java
License:Apache License
/** * Creates a local filesystem failing the test if it can't. *//*w ww . ja v a 2 s.co m*/ public static FileSystem getLocalFileSystem() { Configuration configuration = new Configuration(); try { return FileSystem.getLocal(configuration); } catch (IOException e) { TUtilsTestNG.failForException("Couldn't create a local filesystem", e); return null; // Will not be executed. } }
From source file:com.talis.hadoop.rdf.ZipUtils.java
License:Apache License
/** * Write a file to a zip output stream, removing leading path name components * from the actual file name when creating the zip file entry. * //from w w w .jav a 2 s . c o m * The entry placed in the zip file is <code>baseName</code>/ * <code>relativePath</code>, where <code>relativePath</code> is constructed * by removing a leading <code>root</code> from the path for * <code>itemToZip</code>. * * If <code>itemToZip</code> is an empty directory, it is ignored. If * <code>itemToZip</code> is a directory, the contents of the directory are * added recursively. * * @param zos The zip output stream * @param baseName The base name to use for the file name entry in the zip * file * @param root The path to remove from <code>itemToZip</code> to make a * relative path name * @param itemToZip The path to the file to be added to the zip file * @return the number of entries added * @throws IOException */ static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName, final String root, final Path itemToZip) throws IOException { LOG.info("zipDirectory: {} {} {}", new Object[] { baseName, root, itemToZip }); LocalFileSystem localFs = FileSystem.getLocal(conf); int count = 0; final FileStatus itemStatus = localFs.getFileStatus(itemToZip); if (itemStatus.isDir()) { final FileStatus[] statai = localFs.listStatus(itemToZip); // Add a directory entry to the zip file final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR); LOG.info(String.format("Adding directory %s to zip", zipDirName)); zos.putNextEntry(dirZipEntry); zos.closeEntry(); count++; if (statai == null || statai.length == 0) { LOG.info(String.format("Skipping empty directory %s", itemToZip)); return count; } for (FileStatus status : statai) { count += zipDirectory(conf, zos, baseName, root, status.getPath()); } LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip)); return count; } final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); if (inZipPath.length() == 0) { LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName)); return 0; } // Take empty files in case the place holder is needed FSDataInputStream in = null; try { in = localFs.open(itemToZip); final ZipEntry ze = new ZipEntry(inZipPath); ze.setTime(itemStatus.getModificationTime()); // Comments confuse looking at the zip file // ze.setComment(itemToZip.toString()); zos.putNextEntry(ze); IOUtils.copyBytes(in, zos, conf, false); zos.closeEntry(); LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip)); return 1; } finally { in.close(); } }
From source file:com.tdunning.plume.local.lazy.MapRedBypassTest.java
License:Apache License
@Test public void test() throws Exception { String outputPath = "/tmp/output-plume-bypasstest"; String inputPath = "/tmp/input-wordcount.txt"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath)); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow MapRedBypassWorkflow workFlow = new MapRedBypassWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); List<String> str = Files.readLines(new File(outputPath + "/1_1/1-r-00000"), Charsets.UTF_8); Map<String, String> m = Maps.newHashMap(); for (String line : str) { m.put(line.split("\t")[0], line.split("\t")[1]); // not super-optimal, but less code }/* www .j a va 2s . com*/ assertEquals(m.get("To test text processing with some simple-blah"), "To test text processing with some simple-bloh"); assertEquals(m.get("some simple text-blah"), "some simple text-bloh"); assertEquals(m.get("is is-blah"), "is is-bloh"); str = Files.readLines(new File(outputPath + "/1_2/2-r-00000"), Charsets.UTF_8); m = Maps.newHashMap(); for (String line : str) { m.put(line.split("\t")[0], line.split("\t")[1]); // not super-optimal, but less code } assertEquals(m.get("To test text processing with some simple"), "foo"); assertEquals(m.get("some simple text"), "foo"); assertEquals(m.get("is is"), "foo"); }
From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java
License:Apache License
/** * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation. * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails, * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final * result to the API parameter if the work flow has been executed successfully. * /*from ww w. j a v a2s .c o m*/ * @param workFlow The {@link PlumeWorkflow} to execute * @param outputTo Output folder where the result of the work flow will be stored if executed successfully * * @throws IOException If the work flow had to be canceled * @throws InterruptedException */ public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException { Optimizer optimizer = new Optimizer(); ExecutionStep step = optimizer.optimize(workFlow); int nStep = 0; final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis(); do { nStep++; log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName()); // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size()); // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance - final AtomicBoolean abort = new AtomicBoolean(false); // For each MSCR that can be executed concurrently... for (final MSCR mscr : step.mscrSteps) { final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId; final String jobId = workFlowId + "/" + mscr.getId(); final String jobOutputPath = tmpOutputFolder + "/" + jobId; log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to " + jobOutputPath); // ... Get its MapRed Job final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath); final FileSystem fS = FileSystem.getLocal(job.getConfiguration()); // ... Submit it to the ThreadPool executor.submit(new Runnable() { @Override public void run() { try { job.waitForCompletion(true); // job completed successfully - materialize outputs log.info("jobId " + jobId + " completed successfully, now materializing outputs."); for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) { LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels() .get(entry.getKey()).output; // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id // This way, mappers that read unmaterialized collections will know where to find intermediate states. FileStatus[] files = fS.listStatus(new Path(jobOutputPath)); Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId()); fS.mkdirs(materializedPath); for (FileStatus file : files) { if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) { FileUtil.copy(fS, file.getPath(), fS, materializedPath, false, job.getConfiguration()); oCol.setFile(materializedPath.toString()); } } log.info( "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile()); } } catch (IOException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } catch (InterruptedException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } catch (ClassNotFoundException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } finally { latch.countDown(); // Count down under any circumstance } } }); } latch.await(); // wait until all MSCRs from this step are completed if (abort.get()) { throw new IOException("Current Workflow was aborted"); } step = step.nextStep; } while (step != null); log.info("Workflow ended correctly."); // Move temporary result to where API user wants to: WARN: Local-specific implementation Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo)); }
From source file:com.tdunning.plume.local.lazy.MapRedFlattenTest.java
License:Apache License
@Test public void test() throws Exception { String outputPath = "/tmp/output-plume-flattentest"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); system.copyFromLocalFile(new Path(Resources.getResource("event2users.txt").getPath()), new Path(inputPathEvent2)); system.copyFromLocalFile(new Path(Resources.getResource("eventslog.txt").getPath()), new Path(inputPathLogFile)); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow MapRedFlattenTestWorkflow workFlow = new MapRedFlattenTestWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); }
From source file:com.tdunning.plume.local.lazy.MapRedMultipleGroupsTest.java
License:Apache License
@Test public void test() throws IOException, InterruptedException, ClassNotFoundException { String inputPath = "/tmp/input-wordcount.txt"; String outputPath = "/tmp/output-plume-complex"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath)); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow MultipleGroupsWorkflow workFlow = new MultipleGroupsWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); // Just assert that 3 output files were written and have content /**//www . j a va 2s .c o m * TODO This test has to check the actual results of the 3 outputs */ for (int i = 1; i <= 3; i++) { File f = new File(outputPath + "/1_" + i + "/" + i + "-r-00000"); assertTrue(f.exists()); assertTrue(f.length() > 64); } }
From source file:com.tdunning.plume.local.lazy.MapRedOnlyFlattensTest.java
License:Apache License
@Test public void test() throws Exception { String outputPath = "/tmp/output-plume-onlyflattentest"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); system.copyFromLocalFile(new Path(Resources.getResource("event2users.txt").getPath()), new Path(inputPathEvent2)); system.copyFromLocalFile(new Path(Resources.getResource("eventslog.txt").getPath()), new Path(inputPathLogFile)); system.copyFromLocalFile(new Path(Resources.getResource("eventslog.txt").getPath()), new Path(inputPathLogFile2)); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow MapRedOnlyFlattensTestWorkflow workFlow = new MapRedOnlyFlattensTestWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); }
From source file:com.tdunning.plume.local.lazy.MapRedSequenceFileTest.java
License:Apache License
@Test public void test() throws Exception { /*/*from w w w. j a v a2 s. c om*/ * Create input which is SequenceFile<int,int> with data 1,2\n3,4 */ Configuration conf = new Configuration(); Path p = new Path(inputPath); FileSystem localFS = FileSystem.getLocal(conf); if (localFS.exists(p)) { localFS.delete(p, true); // wipe it if needed } SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class, IntWritable.class); writer.append(new IntWritable(1), new IntWritable(2)); writer.append(new IntWritable(3), new IntWritable(4)); writer.close(); String outputPath = "/tmp/output-plume-simpletest"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow OtherWorkflow workFlow = new OtherWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); /* * Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5 */ p = new Path(outputPath + "/1_1/1-r-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf); IntWritable key = new IntWritable(1); IntWritable value = new IntWritable(1); reader.next(key, value); assertEquals(key.get(), 2); assertEquals(value.get(), 3); reader.next(key, value); assertEquals(key.get(), 4); assertEquals(value.get(), 5); reader.close(); }
From source file:com.tdunning.plume.local.lazy.MapRedSingleFlattenChannelTest.java
License:Apache License
@Test public void test() throws Exception { String outputPath = "/tmp/output-plume-singleflattenchanneltest"; String inputPath = "/tmp/input-wordcount.txt"; String inputPath2 = "/tmp/input-moretext.txt"; // Prepare input for test FileSystem system = FileSystem.getLocal(new Configuration()); system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath)); system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath2)); // Prepare output for test system.delete(new Path(outputPath), true); // Prepare workflow MapRedSingleFlattenChannelTestWorkflow workFlow = new MapRedSingleFlattenChannelTestWorkflow(); // Execute it MapRedExecutor executor = new MapRedExecutor(); executor.execute(workFlow, outputPath); /**//from w w w. ja v a 2s . c o m * TODO add test validation */ }