Example usage for org.apache.hadoop.hdfs DistributedFileSystem open

List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem open.

Prototype

public FSDataInputStream open(Path f) throws IOException 

Source Link

Document

Opens an FSDataInputStream at the indicated Path.

Usage

From source file:backup.integration.MiniClusterTestBase.java

License:Apache License

@Test
public void testIntegrationBasic() throws Exception {
    File hdfsDir = setupHdfsLocalDir();
    Configuration conf = setupConfig(hdfsDir);

    MiniDFSCluster hdfsCluster = new MiniDFSCluster.Builder(conf).build();
    Thread thread = null;/*from   w w  w .  ja v  a  2  s.co  m*/
    try {
        DistributedFileSystem fileSystem = hdfsCluster.getFileSystem();
        Path path = new Path("/testing.txt");
        writeFile(fileSystem, path);
        Thread.sleep(TimeUnit.SECONDS.toMillis(5));
        AtomicBoolean success = new AtomicBoolean(false);
        thread = new Thread(new Runnable() {
            @Override
            public void run() {
                boolean beginTest = true;
                while (true) {
                    try {
                        try (ByteArrayOutputStream output = new ByteArrayOutputStream()) {
                            try (FSDataInputStream inputStream = fileSystem.open(path)) {
                                IOUtils.copy(inputStream, output);
                            }
                            if (beginTest) {
                                hdfsCluster.startDataNodes(conf, 1, true, null, null);
                                hdfsCluster.stopDataNode(0);
                                beginTest = false;
                            } else {
                                LOG.info("Missing block restored.");
                                success.set(true);
                                return;
                            }
                        }
                    } catch (IOException e) {
                        LOG.error(e.getMessage());
                    }
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                        return;
                    }
                }
            }
        });
        thread.start();
        thread.join(TimeUnit.MINUTES.toMillis(2));
        if (!success.get()) {
            fail();
        }
    } finally {
        if (thread != null) {
            thread.interrupt();
        }
        hdfsCluster.shutdown();
        destroyBackupStore(conf);
    }
}

From source file:com.streamsets.datacollector.hdfs.cluster.KafkaToHDFSIT.java

License:Apache License

@Test(timeout = 120000)
public void testKafkaToHDFSOnCluster() throws Exception {
    List<URI> list = miniSDC.getListOfSlaveSDCURI();
    Assert.assertTrue(list != null && !list.isEmpty());

    Map<String, Map<String, Object>> countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
    Assert.assertNotNull(countersMap);//from  ww w  .  j a v a  2s  . com
    while (VerifyUtils.getSourceOutputRecords(countersMap) != RECORDS_PRODUCED) {
        LOG.debug("Source output records are not equal to " + RECORDS_PRODUCED + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    while (VerifyUtils.getTargetInputRecords(countersMap) != RECORDS_REACHING_TARGET) {
        LOG.debug("Target Input records are not equal to " + RECORDS_REACHING_TARGET + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    //HDFS configuration is set to roll file after 15 records.
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/" + TestUtil.getCurrentYear()));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            Assert.assertTrue(line.contains("Hello Kafka"));
            int j = Integer.parseInt(line.substring(11, 12));
            Assert.assertTrue(j % 2 == 0);
            recordsRead++;
            line = br.readLine();
        }
    }

    Assert.assertEquals(RECORDS_REACHING_TARGET, recordsRead);
}

From source file:com.streamsets.datacollector.hdfs.cluster.TestKafkaToHDFS.java

License:Apache License

@Test(timeout = 120000)
public void testKafkaToHDFSOnCluster() throws Exception {
    List<URI> list = miniSDC.getListOfSlaveSDCURI();
    Assert.assertTrue(list != null && !list.isEmpty());

    Map<String, Map<String, Object>> countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
    Assert.assertNotNull(countersMap);/*from   ww w  .j a  va 2 s  .c  o m*/
    while (VerifyUtils.getSourceOutputRecords(countersMap) != RECORDS_PRODUCED) {
        LOG.debug("Source output records are not equal to " + RECORDS_PRODUCED + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    while (VerifyUtils.getTargetInputRecords(countersMap) != RECORDS_REACHING_TARGET) {
        LOG.debug("Target Input records are not equal to " + RECORDS_REACHING_TARGET + " retrying again");
        Thread.sleep(500);
        countersMap = VerifyUtils.getCounters(list, "cluster_kafka_hdfs", "0");
        Assert.assertNotNull(countersMap);
    }
    //HDFS configuration is set to roll file after 15 records.
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    //resolve ${YYYY()} instead of hardcoding 2015
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/2015"));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            Assert.assertTrue(line.contains("Hello Kafka"));
            int j = Integer.parseInt(line.substring(11, 12));
            Assert.assertTrue(j % 2 == 0);
            recordsRead++;
            line = br.readLine();
        }
    }

    Assert.assertEquals(RECORDS_REACHING_TARGET, recordsRead);
}

From source file:com.streamsets.datacollector.hdfs.standalone.HdfsDestinationPipelineRunIT.java

License:Apache License

@Override
protected int getRecordsInTarget() throws IOException {
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/" + TestUtil.getCurrentYear()));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            recordsRead++;//w  w w .j a v a2s  .  c o m
            line = br.readLine();
        }
    }
    return recordsRead;
}

From source file:com.streamsets.datacollector.hdfs.standalone.TestHdfsDestinationPipelineRun.java

License:Apache License

@Override
protected int getRecordsInTarget() throws IOException {
    int recordsRead = 0;
    DistributedFileSystem fileSystem = miniDFS.getFileSystem();
    //resolve ${YYYY()} instead of hardcoding 2015
    FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/tmp/out/2015"));
    for (FileStatus f : fileStatuses) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(f.getPath())));
        String line = br.readLine();
        while (line != null) {
            recordsRead++;/*from   w  w  w  .java  2s  .  c o m*/
            line = br.readLine();
        }
    }
    return recordsRead;
}

From source file:io.druid.indexer.HdfsClasspathSetupTest.java

License:Apache License

@Test
public void testAddSnapshotJarToClasspath() throws IOException {
    Job job = Job.getInstance(conf, "test-job");
    DistributedFileSystem fs = miniCluster.getFileSystem();
    Path intermediatePath = new Path("/tmp/classpath");
    JobHelper.addSnapshotJarToClassPath(dummyJarFile, intermediatePath, fs, job);
    Path expectedJarPath = new Path(intermediatePath, dummyJarFile.getName());
    // check file gets uploaded to HDFS
    Assert.assertTrue(fs.exists(expectedJarPath));
    // check file gets added to the classpath
    Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES));
    Assert.assertEquals(dummyJarString, StringUtils.fromUtf8(IOUtils.toByteArray(fs.open(expectedJarPath))));
}

From source file:io.druid.indexer.HdfsClasspathSetupTest.java

License:Apache License

@Test
public void testAddNonSnapshotJarToClasspath() throws IOException {
    Job job = Job.getInstance(conf, "test-job");
    DistributedFileSystem fs = miniCluster.getFileSystem();
    JobHelper.addJarToClassPath(dummyJarFile, finalClasspath, intermediatePath, fs, job);
    Path expectedJarPath = new Path(finalClasspath, dummyJarFile.getName());
    // check file gets uploaded to final HDFS path
    Assert.assertTrue(fs.exists(expectedJarPath));
    // check that the intermediate file gets deleted
    Assert.assertFalse(fs.exists(new Path(intermediatePath, dummyJarFile.getName())));
    // check file gets added to the classpath
    Assert.assertEquals(expectedJarPath.toString(), job.getConfiguration().get(MRJobConfig.CLASSPATH_FILES));
    Assert.assertEquals(dummyJarString, StringUtils.fromUtf8(IOUtils.toByteArray(fs.open(expectedJarPath))));
}

From source file:io.hops.erasure_coding.TestBlockReconstructor.java

License:Apache License

@Test
public void testSourceBlockRepair() throws IOException, InterruptedException {
    DistributedFileSystem dfs = (DistributedFileSystem) getFileSystem();
    TestDfsClient testDfsClient = new TestDfsClient(getConfig());
    testDfsClient.injectIntoDfs(dfs);//w  w  w  .  j  a  v  a  2  s .  com
    FileStatus testFileStatus = dfs.getFileStatus(testFile);

    String path = testFileStatus.getPath().toUri().getPath();
    int blockToLoose = new Random(seed)
            .nextInt((int) (testFileStatus.getLen() / testFileStatus.getBlockSize()));
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(path, 0, Long.MAX_VALUE).get(blockToLoose);
    DataNodeUtil.loseBlock(getCluster(), lb);
    List<LocatedBlock> lostBlocks = new ArrayList<LocatedBlock>();
    lostBlocks.add(lb);
    LocatedBlocks locatedBlocks = new LocatedBlocks(0, false, lostBlocks, null, true);
    testDfsClient.setMissingLocatedBlocks(locatedBlocks);

    LocatedBlocks missingBlocks = new LocatedBlocks(testFileStatus.getLen(), false,
            new ArrayList<LocatedBlock>(), null, true);
    missingBlocks.getLocatedBlocks().add(lb);
    BlockReconstructor blockReconstructor = new BlockReconstructor(conf);
    Decoder decoder = new Decoder(conf, Util.getCodec(Util.Codecs.SRC));
    blockReconstructor.processFile(testFile, testParityFile, missingBlocks, decoder, null);

    // Block is recovered to the same data node so no need to wait for the block report
    try {
        FSDataInputStream in = dfs.open(testFile);
        byte[] buff = new byte[TEST_BLOCK_COUNT * DFS_TEST_BLOCK_SIZE];
        in.readFully(0, buff);
    } catch (BlockMissingException e) {
        LOG.error("Reading failed", e);
        fail("Repair failed. Missing a block.");
    }
}

From source file:io.hops.erasure_coding.TestBlockReconstructor.java

License:Apache License

@Test
public void testParityBlockRepair() throws IOException, InterruptedException {
    DistributedFileSystem dfs = (DistributedFileSystem) getFileSystem();
    TestDfsClient testDfsClient = new TestDfsClient(getConfig());
    testDfsClient.injectIntoDfs(dfs);/*from   w ww. j  a v  a 2 s .  c  om*/
    FileStatus parityFileStatus = dfs.getFileStatus(testParityFile);

    String path = parityFileStatus.getPath().toUri().getPath();
    int blockToLoose = new Random(seed)
            .nextInt((int) (parityFileStatus.getLen() / parityFileStatus.getBlockSize()));
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(path, 0, Long.MAX_VALUE).get(blockToLoose);
    DataNodeUtil.loseBlock(getCluster(), lb);
    List<LocatedBlock> lostBlocks = new ArrayList<LocatedBlock>();
    lostBlocks.add(lb);
    LocatedBlocks locatedBlocks = new LocatedBlocks(0, false, lostBlocks, null, true);
    testDfsClient.setMissingLocatedBlocks(locatedBlocks);

    LocatedBlocks missingBlocks = new LocatedBlocks(parityFileStatus.getLen(), false,
            new ArrayList<LocatedBlock>(), null, true);
    missingBlocks.getLocatedBlocks().add(lb);
    BlockReconstructor blockReconstructor = new BlockReconstructor(conf);
    Decoder decoder = new Decoder(conf, Util.getCodec(Util.Codecs.SRC));
    blockReconstructor.processParityFile(testFile, testParityFile, missingBlocks, decoder, null);

    // Block is recovered to the same data node so no need to wait for the block report
    try {
        FSDataInputStream in = dfs.open(testParityFile);
        byte[] buff = new byte[DFS_TEST_BLOCK_SIZE * codec.parityLength];
        in.readFully(0, buff);
    } catch (BlockMissingException e) {
        LOG.error("Reading failed", e);
        fail("Repair failed. Missing a block.");
    }
}

From source file:io.hops.erasure_coding.TestErasureCodingManagerEndless.java

License:Apache License

@Ignore
public void endlessSourceTest() throws IOException, InterruptedException {
    DistributedFileSystem dfs = (DistributedFileSystem) getFileSystem();

    Codec.initializeCodecs(getConfig());
    EncodingPolicy policy = new EncodingPolicy("src", (short) 1);
    Util.createRandomFile(dfs, testFile, seed, TEST_BLOCK_COUNT, DFS_TEST_BLOCK_SIZE, policy);
    FileStatus testFileStatus = dfs.getFileStatus(testFile);

    while (!dfs.getEncodingStatus(testFile.toUri().getPath()).isEncoded()) {
        try {//w  w  w. j a va 2s  .com
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            LOG.error("Wait for encoding thread was interrupted.");
        }
    }

    EncodingStatus status = dfs.getEncodingStatus(testFile.toUri().getPath());
    Path parityPath = new Path("/parity/" + status.getParityFileName());
    FileStatus parityStatus = dfs.getFileStatus(parityPath);
    assertEquals(parityStatus.getLen(), TEST_STRIPE_COUNT * TEST_PARITY_LENGTH * DFS_TEST_BLOCK_SIZE);
    try {
        FSDataInputStream in = dfs.open(parityPath);
        byte[] buff = new byte[TEST_STRIPE_COUNT * TEST_PARITY_LENGTH * DFS_TEST_BLOCK_SIZE];
        in.readFully(0, buff);
    } catch (BlockMissingException e) {
        LOG.error("Reading parity failed", e);
        fail("Parity could not be read.");
    }

    String path = testFileStatus.getPath().toUri().getPath();
    int blockToLoose = new Random(seed)
            .nextInt((int) (testFileStatus.getLen() / testFileStatus.getBlockSize()));
    LocatedBlock lb = dfs.getClient().getLocatedBlocks(path, 0, Long.MAX_VALUE).get(blockToLoose);
    DataNodeUtil.loseBlock(getCluster(), lb);
    LOG.info("Loosing block " + lb.toString());

    EncodingStatus lastStatus = null;
    while (true) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            LOG.warn("Was interrupted", e);
        }
        EncodingStatus status2 = dfs.getEncodingStatus(testFile.toUri().getPath());
        if (status2.equals(lastStatus) == false) {
            LOG.info("New status is " + status2.getStatus());
            lastStatus = status2;
        }
    }
}