Example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext

List of usage examples for org.apache.hadoop.fs FileContext getLocalFSFileContext

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext.

Prototype

public static FileContext getLocalFSFileContext() throws UnsupportedFileSystemException 

Source Link

Usage

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testIdempotencyWithMultipleEmitTuples() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);

    List<String> allLines = Lists.newArrayList();
    for (int file = 0; file < 2; file++) {
        List<String> lines = Lists.newArrayList();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }/*  ww w . j a  v  a2 s . c o m*/
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");

    oper.setWindowDataManager(manager);

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    TestUtils.setSink(oper.output, queryResults);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(testMeta.context);
    oper.beginWindow(0);
    for (int i = 0; i < 3; i++) {
        oper.emitTuples();
    }
    oper.endWindow();
    oper.teardown();
    List<String> beforeRecovery = Lists.newArrayList(queryResults.collectedTuples);

    queryResults.clear();

    //idempotency  part
    oper.setup(testMeta.context);
    oper.beginWindow(0);
    oper.endWindow();
    Assert.assertEquals("number tuples", 4, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", beforeRecovery, queryResults.collectedTuples);
    oper.teardown();
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testIdempotencyWhenFileContinued() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);

    List<String> lines = Lists.newArrayList();
    for (int line = 0; line < 10; line++) {
        lines.add("l" + line);
    }// w  w  w.  j a  v a  2s .c o m
    FileUtils.write(new File(testMeta.dir, "file0"), StringUtils.join(lines, '\n'));

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");
    oper.setEmitBatchSize(5);

    oper.setWindowDataManager(manager);

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(testMeta.context);
    int offset = 0;
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        if (wid > 0) {
            Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size());
            Assert.assertEquals("lines", lines.subList(offset, offset + 5), queryResults.collectedTuples);
            offset += 5;
        }
        sink.clear();
    }
    oper.teardown();
    sink.clear();

    //idempotency  part
    offset = 0;
    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.endWindow();
        if (wid > 0) {
            Assert.assertEquals("number tuples", 5, queryResults.collectedTuples.size());
            Assert.assertEquals("lines", lines.subList(offset, offset + 5), queryResults.collectedTuples);
            offset += 5;
        }
        sink.clear();
    }
    oper.teardown();
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testStateWithIdempotency() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);

    HashSet<String> allLines = Sets.newHashSet();
    for (int file = 0; file < 3; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }/*w ww.j a va  2  s.c o m*/
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir, "file" + file), StringUtils.join(lines, '\n'));
    }

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();

    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(testMeta.dir + "/recovery");

    oper.setWindowDataManager(manager);

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(testMeta.context);
    for (long wid = 0; wid < 4; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();

    sink.clear();

    //idempotency  part
    oper.pendingFiles.add(new File(testMeta.dir, "file0").getAbsolutePath());
    oper.failedFiles.add(
            new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file1").getAbsolutePath(), 0));
    oper.unfinishedFiles.add(
            new AbstractFileInputOperator.FailedFile(new File(testMeta.dir, "file2").getAbsolutePath(), 0));

    oper.setup(testMeta.context);
    for (long wid = 0; wid < 4; wid++) {
        oper.beginWindow(wid);
        oper.endWindow();
    }
    Assert.assertTrue("pending state", !oper.pendingFiles.contains("file0"));

    for (AbstractFileInputOperator.FailedFile failedFile : oper.failedFiles) {
        Assert.assertTrue("failed state", !failedFile.path.equals("file1"));
    }

    for (AbstractFileInputOperator.FailedFile unfinishedFile : oper.unfinishedFiles) {
        Assert.assertTrue("unfinished state", !unfinishedFile.path.equals("file2"));
    }
    oper.teardown();
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

public static <S extends AbstractFileInputOperator, T> void testIdempotencyWithCheckPoint(S oper,
        CollectorTestSink<T> queryResults, IdempotencyTestDriver<S> driver) throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(driver.getDirectory()).getAbsolutePath()),
            true);/*from   w  w w  .j  av a 2  s. co  m*/

    int file = 0;
    driver.writeFile(5, "file" + file);

    file = 1;
    driver.writeFile(6, "file" + file);

    // empty file
    file = 2;
    driver.writeFile(0, "file" + file);

    FSWindowDataManager manager = new FSWindowDataManager();
    manager.setStatePath(driver.getDirectory() + "/recovery");

    oper.setWindowDataManager(manager);

    oper.setDirectory(driver.getDirectory());
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(driver.getContext());

    oper.setEmitBatchSize(3);

    // sort the pendingFiles and ensure the ordering of the files scanned
    DirectoryScannerNew newScanner = new DirectoryScannerNew();
    oper.setScanner(newScanner);

    // scan directory
    oper.beginWindow(0);
    oper.emitTuples();
    oper.endWindow();

    // emit f0l0, f0l1, f0l2
    oper.beginWindow(1);
    oper.emitTuples();
    oper.endWindow();

    //checkpoint the operator
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    S checkPointOper = checkpoint(oper, bos);

    // start saving output
    driver.setSink(oper, queryResults);

    // emit f0l3, f0l4, and closeFile(f0) in the same window
    oper.beginWindow(2);
    oper.emitTuples();
    oper.endWindow();
    List<T> beforeRecovery2 = Lists.newArrayList(queryResults.collectedTuples);

    // emit f1l0, f1l1, f1l2
    oper.beginWindow(3);
    oper.emitTuples();
    oper.endWindow();
    List<T> beforeRecovery3 = Lists.newArrayList(queryResults.collectedTuples);

    // emit f1l3, f1l4, f1l5
    oper.beginWindow(4);
    oper.emitTuples();
    oper.endWindow();
    List<T> beforeRecovery4 = Lists.newArrayList(queryResults.collectedTuples);

    // closeFile(f1) in a new window
    oper.beginWindow(5);
    oper.emitTuples();
    oper.endWindow();
    List<T> beforeRecovery5 = Lists.newArrayList(queryResults.collectedTuples);

    // empty file ops, closeFile(f2) in emitTuples() only
    oper.beginWindow(6);
    oper.emitTuples();
    oper.endWindow();
    List<T> beforeRecovery6 = Lists.newArrayList(queryResults.collectedTuples);

    oper.teardown();

    queryResults.clear();

    //idempotency  part

    oper = restoreCheckPoint(checkPointOper, bos);
    driver.getContext().getAttributes().put(Context.OperatorContext.ACTIVATION_WINDOW_ID, 1L);
    oper.setup(driver.getContext());
    driver.setSink(oper, queryResults);

    long startwid = driver.getContext().getAttributes().get(Context.OperatorContext.ACTIVATION_WINDOW_ID) + 1;

    oper.beginWindow(startwid);
    Assert.assertTrue(oper.currentFile == null);
    oper.emitTuples();
    oper.endWindow();
    Assert.assertEquals("lines", beforeRecovery2, queryResults.collectedTuples);

    oper.beginWindow(++startwid);
    oper.emitTuples();
    oper.endWindow();
    Assert.assertEquals("lines", beforeRecovery3, queryResults.collectedTuples);

    oper.beginWindow(++startwid);
    oper.emitTuples();
    oper.endWindow();
    Assert.assertEquals("lines", beforeRecovery4, queryResults.collectedTuples);

    oper.beginWindow(++startwid);
    Assert.assertTrue(oper.currentFile == null);
    oper.emitTuples();
    oper.endWindow();
    Assert.assertEquals("lines", beforeRecovery5, queryResults.collectedTuples);

    oper.beginWindow(++startwid);
    Assert.assertTrue(oper.currentFile == null);
    oper.emitTuples();
    oper.endWindow();
    Assert.assertEquals("lines", beforeRecovery6, queryResults.collectedTuples);

    Assert.assertEquals("number tuples", 8, queryResults.collectedTuples.size());

    oper.teardown();
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testWindowDataManagerPartitioning() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.getScanner().setFilePatternRegexp(".*partition([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setWindowDataManager(new FSWindowDataManager());
    oper.operatorId = 7;//from  w w  w .j  a v a  2s  . co m

    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 4; file++) {
        FileUtils.write(new File(testMeta.dir, "partition00" + file), "");
    }

    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));

    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions,
            new PartitioningContextImpl(null, 2));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, oper.getCurrentPartitions());

    List<FSWindowDataManager> storageManagers = Lists.newLinkedList();
    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        storageManagers.add((FSWindowDataManager) p.getPartitionedInstance().getWindowDataManager());
    }
    Assert.assertEquals("count of storage managers", 2, storageManagers.size());

    int countOfDeleteManagers = 0;
    FSWindowDataManager deleteManager = null;
    for (FSWindowDataManager storageManager : storageManagers) {
        if (storageManager.getDeletedOperators() != null) {
            countOfDeleteManagers++;
            deleteManager = storageManager;
        }
    }

    Assert.assertEquals("count of delete managers", 1, countOfDeleteManagers);
    Assert.assertNotNull("deleted operators manager", deleteManager);
    Assert.assertEquals("deleted operators", Sets.newHashSet(7), deleteManager.getDeletedOperators());
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

/**
 * Partition the operator in 2/*from   ww  w  . j av  a 2  s  .c  om*/
 * create ten files with index of the file at the start, i.e 1_file, 2_file .. etc.
 * The scanner returns this index from getPartition method.
 * each partition should read 5 files as file index are from 0 to 9 (including 0 and 9).
 * @throws Exception
 */
@Test
public void testWithCustomScanner() throws Exception {
    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();
    oper.setScanner(new MyScanner());
    oper.getScanner().setFilePatternRegexp(".*partition_([\\d]*)");
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());

    Random rand = new Random();
    Path path = new Path(new File(testMeta.dir).getAbsolutePath());
    FileContext.getLocalFSFileContext().delete(path, true);
    for (int file = 0; file < 10; file++) {
        FileUtils.write(new File(testMeta.dir, file + "_partition_00" + rand.nextInt(100)), "");
    }

    List<Partition<AbstractFileInputOperator<String>>> partitions = Lists.newArrayList();
    partitions.add(new DefaultPartition<AbstractFileInputOperator<String>>(oper));
    Collection<Partition<AbstractFileInputOperator<String>>> newPartitions = oper.definePartitions(partitions,
            new PartitioningContextImpl(null, 2));
    Assert.assertEquals(2, newPartitions.size());
    Assert.assertEquals(1, oper.getCurrentPartitions()); // partitioned() wasn't called

    for (Partition<AbstractFileInputOperator<String>> p : newPartitions) {
        Assert.assertNotSame(oper, p.getPartitionedInstance());
        Assert.assertNotSame(oper.getScanner(), p.getPartitionedInstance().getScanner());
        Set<String> consumed = Sets.newHashSet();
        LinkedHashSet<Path> files = p.getPartitionedInstance().getScanner()
                .scan(FileSystem.getLocal(new Configuration(false)), path, consumed);
        Assert.assertEquals("partition " + files, 6, files.size());
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.FileSplitterInputTest.java

License:Apache License

static Set<String> createData(String dataDirectory) throws IOException {
    Set<String> filePaths = Sets.newHashSet();
    FileContext.getLocalFSFileContext().delete(new Path(new File(dataDirectory).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    for (int file = 0; file < 12; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            //padding 0 to file number so every file has 6 blocks.
            lines.add("f" + String.format("%02d", file) + "l" + line);
        }/*w w w  . j  a va  2 s . co  m*/
        allLines.addAll(lines);
        File created = new File(dataDirectory, "file" + file + ".txt");
        filePaths.add(created.getAbsolutePath());
        FileUtils.write(created, StringUtils.join(lines, '\n'));
    }
    return filePaths;
}

From source file:org.apache.metron.integration.components.YarnComponent.java

License:Apache License

@Override
public void start() throws UnableToStartException {
    conf = new YarnConfiguration();
    conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
    conf.set("yarn.log.dir", "target");
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
    conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getName());
    conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);

    try {/*from   w w  w.  ja v a  2  s.c  o m*/
        yarnCluster = new MiniYARNCluster(testName, 1, NUM_NMS, 1, 1, true);
        yarnCluster.init(conf);

        yarnCluster.start();

        waitForNMsToRegister();

        URL url = Thread.currentThread().getContextClassLoader().getResource("yarn-site.xml");
        if (url == null) {
            throw new RuntimeException("Could not find 'yarn-site.xml' dummy file in classpath");
        }
        Configuration yarnClusterConfig = yarnCluster.getConfig();
        yarnClusterConfig.set("yarn.application.classpath", new File(url.getPath()).getParent());
        //write the document to a buffer (not directly to the file, as that
        //can cause the file being written to get read -which will then fail.
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        yarnClusterConfig.writeXml(bytesOut);
        bytesOut.close();
        //write the bytes to the file in the classpath
        OutputStream os = new FileOutputStream(new File(url.getPath()));
        os.write(bytesOut.toByteArray());
        os.close();
        FileContext fsContext = FileContext.getLocalFSFileContext();
        fsContext.delete(new Path(conf.get("yarn.timeline-service.leveldb-timeline-store.path")), true);
        try {
            Thread.sleep(2000);
        } catch (InterruptedException e) {
        }
    } catch (Exception e) {
        throw new UnableToStartException("Exception setting up yarn cluster", e);
    }
}

From source file:org.apache.metron.integration.components.YarnComponent.java

License:Apache License

@Override
public void stop() {
    if (yarnCluster != null) {
        try {/*ww w .j av  a  2s.  c  o m*/
            yarnCluster.stop();
        } finally {
            yarnCluster = null;
        }
    }
    try {
        FileContext fsContext = FileContext.getLocalFSFileContext();
        fsContext.delete(new Path(conf.get("yarn.timeline-service.leveldb-timeline-store.path")), true);
    } catch (Exception e) {
    }
}

From source file:org.apache.solr.hadoop.hack.MiniYARNCluster.java

License:Apache License

/**
 * @param testName name of the test//w w  w. j ava  2 s  .  c  o m
 * @param noOfNodeManagers the number of node managers in the cluster
 * @param numLocalDirs the number of nm-local-dirs per nodemanager
 * @param numLogDirs the number of nm-log-dirs per nodemanager
 */
public MiniYARNCluster(String testName, int noOfNodeManagers, int numLocalDirs, int numLogDirs,
        File testWorkDir) {
    super(testName.replace("$", ""));
    this.numLocalDirs = numLocalDirs;
    this.numLogDirs = numLogDirs;
    String testSubDir = testName.replace("$", "");
    File targetWorkDir = new File(testWorkDir, testSubDir);
    try {
        FileContext.getLocalFSFileContext().delete(new Path(targetWorkDir.getAbsolutePath()), true);
    } catch (Exception e) {
        LOG.warn("COULD NOT CLEANUP", e);
        throw new YarnRuntimeException("could not cleanup test dir: " + e, e);
    }

    if (Shell.WINDOWS) {
        // The test working directory can exceed the maximum path length supported
        // by some Windows APIs and cmd.exe (260 characters).  To work around this,
        // create a symlink in temporary storage with a much shorter path,
        // targeting the full path to the test working directory.  Then, use the
        // symlink as the test working directory.
        String targetPath = targetWorkDir.getAbsolutePath();
        File link = new File(System.getProperty("java.io.tmpdir"), String.valueOf(System.currentTimeMillis()));
        String linkPath = link.getAbsolutePath();

        try {
            FileContext.getLocalFSFileContext().delete(new Path(linkPath), true);
        } catch (IOException e) {
            throw new YarnRuntimeException("could not cleanup symlink: " + linkPath, e);
        }

        // Guarantee target exists before creating symlink.
        targetWorkDir.mkdirs();

        ShellCommandExecutor shexec = new ShellCommandExecutor(Shell.getSymlinkCommand(targetPath, linkPath));
        try {
            shexec.execute();
        } catch (IOException e) {
            throw new YarnRuntimeException(
                    String.format(Locale.ENGLISH, "failed to create symlink from %s to %s, shell output: %s",
                            linkPath, targetPath, shexec.getOutput()),
                    e);
        }

        this.testWorkDir = link;
    } else {
        this.testWorkDir = targetWorkDir;
    }

    resourceManagerWrapper = new ResourceManagerWrapper();
    addService(resourceManagerWrapper);
    nodeManagers = new CustomNodeManager[noOfNodeManagers];
    for (int index = 0; index < noOfNodeManagers; index++) {
        addService(new NodeManagerWrapper(index));
        nodeManagers[index] = new CustomNodeManager();
    }
}