List of usage examples for org.apache.hadoop.hdfs DistributedFileSystem initialize
@Override
public void initialize(URI uri, Configuration conf) throws IOException
From source file:org.commoncrawl.service.pagerank.slave.PageRankUtils.java
License:Open Source License
private static FileSystem buildDistributionOutputStreamVector(boolean useSequenceFile, String fileNamePrefix, File localOutputPath, String remoteOutputPath, int myNodeIndex, int nodeCount, Vector<PRValueOutputStream> outputStreamVector) { Configuration conf = new Configuration(CrawlEnvironment.getHadoopConfig()); conf.setInt("dfs.socket.timeout", 240000); conf.setInt("io.file.buffer.size", 4096 * 20); DistributedFileSystem hdfs = new DistributedFileSystem(); try {//from w w w . ja v a 2 s . c om hdfs.initialize(FileSystem.getDefaultUri(conf), conf); for (int i = 0; i < nodeCount; ++i) { // create output filename String fileName = fileNamePrefix + "-" + NUMBER_FORMAT.format(i); // create stream (local or remote stream, depending on i) // remote path Path remotePath = new Path(remoteOutputPath, fileName); // remove file CrawlEnvironment.getDefaultFileSystem().delete(remotePath, false); if (useSequenceFile) { // recreate it ... outputStreamVector.add(new PRSequenceFileOutputStream(conf, CrawlEnvironment.getDefaultFileSystem(), remotePath)); } else { // recreate it ... outputStreamVector .add(new PROldValueOutputStream(CrawlEnvironment.getDefaultFileSystem(), remotePath)); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); for (PRValueOutputStream streamInfo : outputStreamVector) { try { if (streamInfo != null) { streamInfo.close(true); } } catch (IOException e2) { LOG.error(CCStringUtils.stringifyException(e2)); } outputStreamVector.clear(); } } return hdfs; }