List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, short replication) throws IOException
From source file:com.yahoo.glimmer.indexing.CompressionCodecHelper.java
License:Open Source License
public static OutputStream openOutputStream(Configuration conf, Path path, boolean overwrite) throws IOException { FileSystem fs = FileSystem.get(conf); OutputStream outputStream = fs.create(path, overwrite); return wrapStream(conf, path, outputStream); }
From source file:com.yahoo.glimmer.indexing.preprocessor.ResourceRecordWriter.java
License:Open Source License
public ResourceRecordWriter(FileSystem fs, Path taskWorkPath, CompressionCodec codecIfAny) throws IOException { if (fs.exists(taskWorkPath)) { throw new IOException("Task work path already exists:" + taskWorkPath.toString()); }/* w ww.jav a 2 s. c o m*/ fs.mkdirs(taskWorkPath); for (OUTPUT output : OUTPUT.values()) { OutputStream out; if (codecIfAny != null) { Path file = new Path(taskWorkPath, output.filename + codecIfAny.getDefaultExtension()); out = fs.create(file, false); out = codecIfAny.createOutputStream(out); } else { Path file = new Path(taskWorkPath, output.filename); out = fs.create(file, false); } writersMap.put(output, new OutputStreamWriter(out, Charset.forName("UTF-8"))); } Path file = new Path(taskWorkPath, "bySubject.bz2"); OutputStream compressedOutputStream = fs.create(file, false); file = new Path(taskWorkPath, "bySubject.blockOffsets"); bySubjectOffsetsOutputStream = fs.create(file, false); blockOffsetsBuilder = new BlockOffsets.Builder(); // Create a Writer on a BZip2 compressed OutputStream with a small block // size( * 100K). uncompressedOutputStream = new BZip2OutputStream(compressedOutputStream, 1, new BZip2OutputStream.Callback() { @Override public void blockStart(long blockStartBitOffset) { if (lastBlockStartBitOffset != 0) { blockOffsetsBuilder.setBlockStart(lastBlockStartBitOffset, lastFirstDocId); } lastBlockStartBitOffset = blockStartBitOffset; } @Override public void finish(long totalBitsWritten) { blockOffsetsBuilder.close(totalBitsWritten); } }); bySubjectWriter = new OutputStreamWriter(uncompressedOutputStream); }
From source file:com.yahoo.glimmer.util.ComputeHashTool.java
License:Open Source License
public long buildHash(FileSystem fs, String srcFilename, Long numElements, boolean generateUnsigned, boolean generateSigned, final Charset charset, boolean writeInfoFile) throws IOException, ClassNotFoundException { final MapReducePartInputStreamEnumeration inputStreamEnumeration; try {/*from w ww . j a va 2 s . c o m*/ inputStreamEnumeration = new MapReducePartInputStreamEnumeration(fs, new Path(srcFilename)); } catch (IOException e) { throw new RuntimeException("Failed to open " + srcFilename, e); } LineReaderCollection inCollection = new LineReaderCollection(new LineReaderCollection.ReaderFactory() { @Override public Reader newReader() { inputStreamEnumeration.reset(); return new InputStreamReader(new SequenceInputStream(inputStreamEnumeration), charset); } }); String destFilename = inputStreamEnumeration.removeCompressionSuffixIfAny(srcFilename); Path unsigendPath = new Path(destFilename + DOT_UNSIGNED); HollowTrieMonotoneMinimalPerfectHashFunction<CharSequence> unsignedHash; if (generateUnsigned) { // if (numElements != null) { // LOGGER.info("\tBuilding unsigned hash with given number of elements:" + numElements); // } else { // LOGGER.info("\tBuilding unsigned hash. Getting number of elements from collection..."); // long timeToGetSize = System.currentTimeMillis(); // numElements = inCollection.size64(); // timeToGetSize = System.currentTimeMillis() - timeToGetSize; // LOGGER.info("\tNumber of elements is " + numElements + " found in " + timeToGetSize / 1000 + " seconds"); // } // unsignedHash = new LcpMonotoneMinimalPerfectHashFunction<CharSequence>(inCollection, numElements, TransformationStrategies.prefixFreeUtf16()); unsignedHash = new HollowTrieMonotoneMinimalPerfectHashFunction<CharSequence>(inCollection, TransformationStrategies.prefixFreeUtf32()); LOGGER.info("\tSaving unsigned hash as " + unsigendPath.toString()); writeMapToFile(unsignedHash, fs, unsigendPath); } else { LOGGER.info("\tLoading unsigned hash from " + unsigendPath.toString()); unsignedHash = (HollowTrieMonotoneMinimalPerfectHashFunction<CharSequence>) readMpHashFromFile(fs, unsigendPath); } if (generateSigned) { LOGGER.info("\tBuilding signed hash..."); // ShiftAddXorSignedStringMap signedHash = new ShiftAddXorSignedStringMap(inCollection.iterator(), unsignedHash, signatureWidth); Path signedPath = new Path(destFilename + DOT_SIGNED); DataOutputStream signedDataOutputStream = null; try { signedDataOutputStream = new DataOutputStream( new FastBufferedOutputStream(createOutputStream(fs, signedPath))); LongBigListSignedStringMap.sign(inCollection.iterator(), signedDataOutputStream, null); } finally { if (signedDataOutputStream != null) { signedDataOutputStream.close(); } } LOGGER.info("\tSaving signed hash as " + signedPath.toString()); } if (writeInfoFile) { Path infoPath = new Path(destFilename + DOT_MAPINFO); FSDataOutputStream infoStream = fs.create(infoPath, true);// overwrite fs.setPermission(infoPath, ALL_PERMISSIONS); OutputStreamWriter infoWriter = new OutputStreamWriter(infoStream); infoWriter.write("size\t"); infoWriter.write(Long.toString(unsignedHash.size64())); infoWriter.write("\n"); infoWriter.write("unsignedBits\t"); infoWriter.write(Long.toString((unsignedHash).numBits())); infoWriter.write("\n"); if (generateSigned) { infoWriter.write("signedWidth\t64\n"); } infoWriter.close(); infoStream.close(); } return unsignedHash.size64(); }
From source file:com.yahoo.glimmer.util.ComputeHashTool.java
License:Open Source License
private static OutputStream createOutputStream(FileSystem fs, Path path) throws IOException { FSDataOutputStream outStream = fs.create(path, true);// overwrite; fs.setPermission(path, ALL_PERMISSIONS); return outStream; }
From source file:com.yahoo.semsearch.fastlinking.io.WikipediaDocnoMapping.java
License:Apache License
/** * Creates a mappings file from the contents of a flat text file containing docid to docno * mappings. This method is used by {@link WikipediaDocnoMappingBuilder} internally. * * @param inputFile flat text file containing docid to docno mappings * @param outputFile output mappings file * @throws IOException/* w w w . j av a2 s.com*/ */ static public void writeDocnoMappingData(FileSystem fs, String inputFile, int n, String outputFile) throws IOException { LOG.info("Writing " + n + " docids to " + outputFile); LineReader reader = new LineReader(fs.open(new Path(inputFile))); int cnt = 0; Text line = new Text(); FSDataOutputStream out = fs.create(new Path(outputFile), true); out.writeInt(n); for (int i = 0; i < n; i++) { reader.readLine(line); String[] arr = line.toString().split("\\t"); out.writeInt(Integer.parseInt(arr[0])); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " articles"); } } out.close(); reader.close(); LOG.info("Done!"); }
From source file:com.yolodata.tbana.testutils.HadoopFileTestUtils.java
License:Open Source License
public static void createFileWithContent(FileSystem fs, Path path, String content) throws IOException { FSDataOutputStream fso = fs.create(path, true); fso.writeBytes(content);/*from w w w . j a v a 2 s . c om*/ fso.flush(); fso.close(); }
From source file:com.zjy.mongo.output.MongoRecordWriter.java
License:Apache License
/** * Create a MongoRecordWriter that targets multiple DBCollections. * @param c a list of DBCollections//from w ww . j av a 2 s. c o m * @param ctx the TaskAttemptContext */ public MongoRecordWriter(final List<DBCollection> c, final TaskAttemptContext ctx) { collections = new ArrayList<DBCollection>(c); context = ctx; bsonWritable = new BSONWritable(); // Initialize output stream. try { FileSystem fs = FileSystem.get(ctx.getConfiguration()); Path outputPath = MongoOutputCommitter.getTaskAttemptPath(ctx); LOG.info("Writing to temporary file: " + outputPath.toString()); outputStream = fs.create(outputPath, true); } catch (IOException e) { LOG.error("Could not open temporary file for buffering Mongo output", e); } }
From source file:com.zjy.mongo.splitter.BSONSplitter.java
License:Apache License
/** * Write out the splits file, if doing so has been enabled. Splits must * already have been calculated previously by a call to {@link * #readSplitsForFile readSplitsForFile} or {@link #readSplits readSplits}. * * @see com.zjy.mongo.util.MongoConfigUtil#BSON_WRITE_SPLITS * * @throws IOException when an error occurs writing the file *//*ww w .jav a 2 s . c o m*/ public void writeSplits() throws IOException { if (getConf().getBoolean("bson.split.write_splits", true)) { LOG.info("Writing splits to disk."); } else { LOG.info("bson.split.write_splits is set to false - skipping writing splits to disk."); return; } if (splitsList == null) { LOG.info("No splits found, skipping write of splits file."); } Path outputPath = getSplitsFilePath(inputPath, getConf()); FileSystem pathFileSystem = outputPath.getFileSystem(getConf()); FSDataOutputStream fsDataOut = null; try { fsDataOut = pathFileSystem.create(outputPath, false); for (FileSplit inputSplit : splitsList) { BSONObject splitObj = BasicDBObjectBuilder.start().add("s", inputSplit.getStart()) .add("l", inputSplit.getLength()).get(); byte[] encodedObj = bsonEnc.encode(splitObj); fsDataOut.write(encodedObj, 0, encodedObj.length); } } catch (IOException e) { LOG.error("Could not create splits file: " + e.getMessage()); throw e; } finally { if (fsDataOut != null) { fsDataOut.close(); } } }
From source file:com.zqh.giraph.standalone.SimpleAggregatorWriter.java
License:Apache License
@SuppressWarnings("rawtypes") @Override// w ww . ja v a 2 s . c om public void initialize(Context context, long applicationAttempt) throws IOException { setFilename(applicationAttempt); Path p = new Path(FILENAME); FileSystem fs = FileSystem.get(context.getConfiguration()); output = fs.create(p, true); }
From source file:crunch.MaxTemperature.java
License:Apache License
public static void main(String[] args) throws Exception { String localSrc = args[0];//from w w w.j av a 2 s. c o m String dst = args[1]; InputStream in = new BufferedInputStream(new FileInputStream(localSrc)); // XXX InputStream = BufferedInputStream(new FileInputStream()) Configuration conf = new Configuration(); // XXX hadoop.conf.Configuration FileSystem fs = FileSystem.get(URI.create(dst), conf); // XXX hadoop.fs.FileSystem from URI.create + hadoop.conf.Configuration OutputStream out = fs.create(new Path(dst), new Progressable() { // XXX fs.create -> stream hadoop.util.Progressable public void progress() { System.out.print("."); } }); IOUtils.copyBytes(in, out, 4096, true); // XXX hadoop.IOUtils.copyBytes - CLOSES in and out at the end }