Example usage for org.apache.hadoop.hdfs DistributedFileSystem initialize

List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem initialize

Introduction

In this page you can find the example usage for org.apache.hadoop.hdfs DistributedFileSystem initialize.

Prototype

@Override
    public void initialize(URI uri, Configuration conf) throws IOException 

Source Link

Usage

From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java

License:Open Source License

private static FileSystem buildDistributionOutputStreamVector(boolean useSequenceFile, String fileNamePrefix,
        File localOutputPath, String remoteOutputPath, int myNodeIndex, int nodeCount,
        Vector<PRValueOutputStream> outputStreamVector) {

    Configuration conf = new Configuration(CrawlEnvironment.getHadoopConfig());

    conf.setInt("dfs.socket.timeout", 240000);
    conf.setInt("io.file.buffer.size", 4096 * 20);

    DistributedFileSystem hdfs = new DistributedFileSystem();

    try {//from  w w w  .  ja v  a 2  s .  c om

        hdfs.initialize(FileSystem.getDefaultUri(conf), conf);

        for (int i = 0; i < nodeCount; ++i) {

            // create output filename 
            String fileName = fileNamePrefix + "-" + NUMBER_FORMAT.format(i);
            // create stream (local or remote stream, depending on i) 
            // remote path 
            Path remotePath = new Path(remoteOutputPath, fileName);
            // remove file
            CrawlEnvironment.getDefaultFileSystem().delete(remotePath, false);
            if (useSequenceFile) {
                // recreate it ... 
                outputStreamVector.add(new PRSequenceFileOutputStream(conf,
                        CrawlEnvironment.getDefaultFileSystem(), remotePath));
            } else {
                // recreate it ... 
                outputStreamVector
                        .add(new PROldValueOutputStream(CrawlEnvironment.getDefaultFileSystem(), remotePath));
            }
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        for (PRValueOutputStream streamInfo : outputStreamVector) {
            try {
                if (streamInfo != null) {
                    streamInfo.close(true);
                }
            } catch (IOException e2) {
                LOG.error(CCStringUtils.stringifyException(e2));
            }
            outputStreamVector.clear();
        }
    }

    return hdfs;
}