Example usage for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException

Source Link

Document

Get the local FileSystem.

Usage

From source file:com.splunk.shuttl.testutil.FileSystemUtils.java

License:Apache License

public static FileSystem getLocalFileSystem() {
    Configuration configuration = new Configuration();
    try {// w ww .j  a v a  2 s.  c  o m
        return FileSystem.getLocal(configuration);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.splunk.shuttl.testutil.TUtilsFileSystem.java

License:Apache License

/**
 * Creates a local filesystem failing the test if it can't.
 *//*w  ww  . ja v a 2  s.co m*/
public static FileSystem getLocalFileSystem() {
    Configuration configuration = new Configuration();
    try {
        return FileSystem.getLocal(configuration);
    } catch (IOException e) {
        TUtilsTestNG.failForException("Couldn't create a local filesystem", e);
        return null; // Will not be executed.
    }
}

From source file:com.talis.hadoop.rdf.ZipUtils.java

License:Apache License

/**
 * Write a file to a zip output stream, removing leading path name components
 * from the actual file name when creating the zip file entry.
 * //from   w w w .jav  a 2 s . c o  m
 * The entry placed in the zip file is <code>baseName</code>/
 * <code>relativePath</code>, where <code>relativePath</code> is constructed
 * by removing a leading <code>root</code> from the path for
 * <code>itemToZip</code>.
 * 
 * If <code>itemToZip</code> is an empty directory, it is ignored. If
 * <code>itemToZip</code> is a directory, the contents of the directory are
 * added recursively.
 * 
 * @param zos The zip output stream
 * @param baseName The base name to use for the file name entry in the zip
 *        file
 * @param root The path to remove from <code>itemToZip</code> to make a
 *        relative path name
 * @param itemToZip The path to the file to be added to the zip file
 * @return the number of entries added
 * @throws IOException
 */
static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName,
        final String root, final Path itemToZip) throws IOException {
    LOG.info("zipDirectory: {} {} {}", new Object[] { baseName, root, itemToZip });
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    int count = 0;

    final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
    if (itemStatus.isDir()) {
        final FileStatus[] statai = localFs.listStatus(itemToZip);

        // Add a directory entry to the zip file
        final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);
        final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR);
        LOG.info(String.format("Adding directory %s to zip", zipDirName));
        zos.putNextEntry(dirZipEntry);
        zos.closeEntry();
        count++;

        if (statai == null || statai.length == 0) {
            LOG.info(String.format("Skipping empty directory %s", itemToZip));
            return count;
        }
        for (FileStatus status : statai) {
            count += zipDirectory(conf, zos, baseName, root, status.getPath());
        }
        LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip));
        return count;
    }

    final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);

    if (inZipPath.length() == 0) {
        LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName));
        return 0;
    }

    // Take empty files in case the place holder is needed
    FSDataInputStream in = null;
    try {
        in = localFs.open(itemToZip);
        final ZipEntry ze = new ZipEntry(inZipPath);
        ze.setTime(itemStatus.getModificationTime());
        // Comments confuse looking at the zip file
        // ze.setComment(itemToZip.toString());
        zos.putNextEntry(ze);

        IOUtils.copyBytes(in, zos, conf, false);
        zos.closeEntry();
        LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
        return 1;
    } finally {
        in.close();
    }

}

From source file:com.tdunning.plume.local.lazy.MapRedBypassTest.java

License:Apache License

@Test
public void test() throws Exception {
    String outputPath = "/tmp/output-plume-bypasstest";
    String inputPath = "/tmp/input-wordcount.txt";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath));
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    MapRedBypassWorkflow workFlow = new MapRedBypassWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);

    List<String> str = Files.readLines(new File(outputPath + "/1_1/1-r-00000"), Charsets.UTF_8);
    Map<String, String> m = Maps.newHashMap();
    for (String line : str) {
        m.put(line.split("\t")[0], line.split("\t")[1]); // not super-optimal, but less code
    }/*  www .j a va 2s . com*/
    assertEquals(m.get("To test text processing with some simple-blah"),
            "To test text processing with some simple-bloh");
    assertEquals(m.get("some simple text-blah"), "some simple text-bloh");
    assertEquals(m.get("is is-blah"), "is is-bloh");

    str = Files.readLines(new File(outputPath + "/1_2/2-r-00000"), Charsets.UTF_8);
    m = Maps.newHashMap();
    for (String line : str) {
        m.put(line.split("\t")[0], line.split("\t")[1]); // not super-optimal, but less code
    }
    assertEquals(m.get("To test text processing with some simple"), "foo");
    assertEquals(m.get("some simple text"), "foo");
    assertEquals(m.get("is is"), "foo");
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation.
 * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. 
 * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails,
 * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final
 * result to the API parameter if the work flow has been executed successfully.
 * /*from ww  w. j  a v  a2s .c o  m*/
 * @param workFlow The {@link PlumeWorkflow} to execute 
 * @param outputTo Output folder where the result of the work flow will be stored if executed successfully
 * 
 * @throws IOException If the work flow had to be canceled
 * @throws InterruptedException 
 */
public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException {
    Optimizer optimizer = new Optimizer();
    ExecutionStep step = optimizer.optimize(workFlow);
    int nStep = 0;
    final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis();
    do {
        nStep++;
        log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName());
        // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel
        final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size());
        // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow
        // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance -
        final AtomicBoolean abort = new AtomicBoolean(false);
        // For each MSCR that can be executed concurrently...
        for (final MSCR mscr : step.mscrSteps) {
            final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId;
            final String jobId = workFlowId + "/" + mscr.getId();
            final String jobOutputPath = tmpOutputFolder + "/" + jobId;
            log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to "
                    + jobOutputPath);
            // ... Get its MapRed Job
            final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath);
            final FileSystem fS = FileSystem.getLocal(job.getConfiguration());
            // ... Submit it to the ThreadPool
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        job.waitForCompletion(true);
                        // job completed successfully - materialize outputs
                        log.info("jobId " + jobId + " completed successfully, now materializing outputs.");
                        for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
                            LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels()
                                    .get(entry.getKey()).output;
                            // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id
                            // This way, mappers that read unmaterialized collections will know where to find intermediate states.
                            FileStatus[] files = fS.listStatus(new Path(jobOutputPath));
                            Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId());
                            fS.mkdirs(materializedPath);
                            for (FileStatus file : files) {
                                if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) {
                                    FileUtil.copy(fS, file.getPath(), fS, materializedPath, false,
                                            job.getConfiguration());
                                    oCol.setFile(materializedPath.toString());
                                }
                            }
                            log.info(
                                    "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile());
                        }
                    } catch (IOException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (InterruptedException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (ClassNotFoundException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } finally {
                        latch.countDown(); // Count down under any circumstance
                    }
                }
            });
        }
        latch.await(); // wait until all MSCRs from this step are completed
        if (abort.get()) {
            throw new IOException("Current Workflow was aborted");
        }
        step = step.nextStep;
    } while (step != null);
    log.info("Workflow ended correctly.");
    // Move temporary result to where API user wants to: WARN: Local-specific implementation
    Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo));
}

From source file:com.tdunning.plume.local.lazy.MapRedFlattenTest.java

License:Apache License

@Test
public void test() throws Exception {
    String outputPath = "/tmp/output-plume-flattentest";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    system.copyFromLocalFile(new Path(Resources.getResource("event2users.txt").getPath()),
            new Path(inputPathEvent2));
    system.copyFromLocalFile(new Path(Resources.getResource("eventslog.txt").getPath()),
            new Path(inputPathLogFile));
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    MapRedFlattenTestWorkflow workFlow = new MapRedFlattenTestWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
}

From source file:com.tdunning.plume.local.lazy.MapRedMultipleGroupsTest.java

License:Apache License

@Test
public void test() throws IOException, InterruptedException, ClassNotFoundException {
    String inputPath = "/tmp/input-wordcount.txt";
    String outputPath = "/tmp/output-plume-complex";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath));
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    MultipleGroupsWorkflow workFlow = new MultipleGroupsWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);

    // Just assert that 3 output files were written and have content
    /**//www .  j  a  va 2s .c o m
     * TODO This test has to check the actual results of the 3 outputs
     */
    for (int i = 1; i <= 3; i++) {
        File f = new File(outputPath + "/1_" + i + "/" + i + "-r-00000");
        assertTrue(f.exists());
        assertTrue(f.length() > 64);
    }
}

From source file:com.tdunning.plume.local.lazy.MapRedOnlyFlattensTest.java

License:Apache License

@Test
public void test() throws Exception {
    String outputPath = "/tmp/output-plume-onlyflattentest";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    system.copyFromLocalFile(new Path(Resources.getResource("event2users.txt").getPath()),
            new Path(inputPathEvent2));
    system.copyFromLocalFile(new Path(Resources.getResource("eventslog.txt").getPath()),
            new Path(inputPathLogFile));
    system.copyFromLocalFile(new Path(Resources.getResource("eventslog.txt").getPath()),
            new Path(inputPathLogFile2));
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    MapRedOnlyFlattensTestWorkflow workFlow = new MapRedOnlyFlattensTestWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
}

From source file:com.tdunning.plume.local.lazy.MapRedSequenceFileTest.java

License:Apache License

@Test
public void test() throws Exception {
    /*/*from   w  w w.  j a  v  a2 s. c  om*/
     * Create input which is SequenceFile<int,int> with data 1,2\n3,4
     */
    Configuration conf = new Configuration();
    Path p = new Path(inputPath);
    FileSystem localFS = FileSystem.getLocal(conf);
    if (localFS.exists(p)) {
        localFS.delete(p, true); // wipe it if needed
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(localFS, conf, p, IntWritable.class,
            IntWritable.class);
    writer.append(new IntWritable(1), new IntWritable(2));
    writer.append(new IntWritable(3), new IntWritable(4));
    writer.close();
    String outputPath = "/tmp/output-plume-simpletest";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    OtherWorkflow workFlow = new OtherWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
    /*
     * Read output which is SequenceFile<int,int> and assert that it has data 2,3\n4,5
     */
    p = new Path(outputPath + "/1_1/1-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(localFS, p, conf);
    IntWritable key = new IntWritable(1);
    IntWritable value = new IntWritable(1);
    reader.next(key, value);
    assertEquals(key.get(), 2);
    assertEquals(value.get(), 3);
    reader.next(key, value);
    assertEquals(key.get(), 4);
    assertEquals(value.get(), 5);
    reader.close();
}

From source file:com.tdunning.plume.local.lazy.MapRedSingleFlattenChannelTest.java

License:Apache License

@Test
public void test() throws Exception {
    String outputPath = "/tmp/output-plume-singleflattenchanneltest";
    String inputPath = "/tmp/input-wordcount.txt";
    String inputPath2 = "/tmp/input-moretext.txt";
    // Prepare input for test
    FileSystem system = FileSystem.getLocal(new Configuration());
    system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()), new Path(inputPath));
    system.copyFromLocalFile(new Path(Resources.getResource("simple-text.txt").getPath()),
            new Path(inputPath2));
    // Prepare output for test
    system.delete(new Path(outputPath), true);
    // Prepare workflow
    MapRedSingleFlattenChannelTestWorkflow workFlow = new MapRedSingleFlattenChannelTestWorkflow();
    // Execute it
    MapRedExecutor executor = new MapRedExecutor();
    executor.execute(workFlow, outputPath);
    /**//from w w  w.  ja  v a 2s . c  o m
     * TODO add test validation
     */
}