Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.yahoo.glimmer.indexing.CompressionCodecHelper.java

License:Open Source License

public static OutputStream openOutputStream(Configuration conf, Path path, boolean overwrite)
        throws IOException {
    FileSystem fs = FileSystem.get(conf);
    OutputStream outputStream = fs.create(path, overwrite);
    return wrapStream(conf, path, outputStream);
}

From source file:com.yahoo.glimmer.indexing.preprocessor.ResourceRecordWriter.java

License:Open Source License

public ResourceRecordWriter(FileSystem fs, Path taskWorkPath, CompressionCodec codecIfAny) throws IOException {
    if (fs.exists(taskWorkPath)) {
        throw new IOException("Task work path already exists:" + taskWorkPath.toString());
    }/* w ww.jav a 2  s.  c  o  m*/
    fs.mkdirs(taskWorkPath);

    for (OUTPUT output : OUTPUT.values()) {
        OutputStream out;
        if (codecIfAny != null) {
            Path file = new Path(taskWorkPath, output.filename + codecIfAny.getDefaultExtension());
            out = fs.create(file, false);
            out = codecIfAny.createOutputStream(out);
        } else {
            Path file = new Path(taskWorkPath, output.filename);
            out = fs.create(file, false);
        }
        writersMap.put(output, new OutputStreamWriter(out, Charset.forName("UTF-8")));
    }

    Path file = new Path(taskWorkPath, "bySubject.bz2");
    OutputStream compressedOutputStream = fs.create(file, false);
    file = new Path(taskWorkPath, "bySubject.blockOffsets");
    bySubjectOffsetsOutputStream = fs.create(file, false);

    blockOffsetsBuilder = new BlockOffsets.Builder();
    // Create a Writer on a BZip2 compressed OutputStream with a small block
    // size( * 100K).
    uncompressedOutputStream = new BZip2OutputStream(compressedOutputStream, 1,
            new BZip2OutputStream.Callback() {
                @Override
                public void blockStart(long blockStartBitOffset) {
                    if (lastBlockStartBitOffset != 0) {
                        blockOffsetsBuilder.setBlockStart(lastBlockStartBitOffset, lastFirstDocId);
                    }
                    lastBlockStartBitOffset = blockStartBitOffset;
                }

                @Override
                public void finish(long totalBitsWritten) {
                    blockOffsetsBuilder.close(totalBitsWritten);
                }
            });
    bySubjectWriter = new OutputStreamWriter(uncompressedOutputStream);
}

From source file:com.yahoo.glimmer.util.ComputeHashTool.java

License:Open Source License

public long buildHash(FileSystem fs, String srcFilename, Long numElements, boolean generateUnsigned,
        boolean generateSigned, final Charset charset, boolean writeInfoFile)
        throws IOException, ClassNotFoundException {
    final MapReducePartInputStreamEnumeration inputStreamEnumeration;
    try {/*from   w ww .  j  a va  2 s  . c  o  m*/
        inputStreamEnumeration = new MapReducePartInputStreamEnumeration(fs, new Path(srcFilename));
    } catch (IOException e) {
        throw new RuntimeException("Failed to open " + srcFilename, e);
    }

    LineReaderCollection inCollection = new LineReaderCollection(new LineReaderCollection.ReaderFactory() {
        @Override
        public Reader newReader() {
            inputStreamEnumeration.reset();
            return new InputStreamReader(new SequenceInputStream(inputStreamEnumeration), charset);
        }
    });

    String destFilename = inputStreamEnumeration.removeCompressionSuffixIfAny(srcFilename);
    Path unsigendPath = new Path(destFilename + DOT_UNSIGNED);

    HollowTrieMonotoneMinimalPerfectHashFunction<CharSequence> unsignedHash;
    if (generateUnsigned) {
        //       if (numElements != null) {
        //      LOGGER.info("\tBuilding unsigned hash with given number of elements:" + numElements);
        //       } else {
        //      LOGGER.info("\tBuilding unsigned hash. Getting number of elements from collection...");
        //      long timeToGetSize = System.currentTimeMillis();
        //      numElements = inCollection.size64();
        //      timeToGetSize = System.currentTimeMillis() - timeToGetSize;
        //      LOGGER.info("\tNumber of elements is " + numElements + " found in " + timeToGetSize / 1000 + " seconds");
        //       }
        //       unsignedHash = new LcpMonotoneMinimalPerfectHashFunction<CharSequence>(inCollection, numElements, TransformationStrategies.prefixFreeUtf16());
        unsignedHash = new HollowTrieMonotoneMinimalPerfectHashFunction<CharSequence>(inCollection,
                TransformationStrategies.prefixFreeUtf32());
        LOGGER.info("\tSaving unsigned hash as " + unsigendPath.toString());
        writeMapToFile(unsignedHash, fs, unsigendPath);
    } else {
        LOGGER.info("\tLoading unsigned hash from " + unsigendPath.toString());
        unsignedHash = (HollowTrieMonotoneMinimalPerfectHashFunction<CharSequence>) readMpHashFromFile(fs,
                unsigendPath);
    }

    if (generateSigned) {
        LOGGER.info("\tBuilding signed hash...");
        //       ShiftAddXorSignedStringMap signedHash = new ShiftAddXorSignedStringMap(inCollection.iterator(), unsignedHash, signatureWidth);
        Path signedPath = new Path(destFilename + DOT_SIGNED);
        DataOutputStream signedDataOutputStream = null;
        try {
            signedDataOutputStream = new DataOutputStream(
                    new FastBufferedOutputStream(createOutputStream(fs, signedPath)));
            LongBigListSignedStringMap.sign(inCollection.iterator(), signedDataOutputStream, null);
        } finally {
            if (signedDataOutputStream != null) {
                signedDataOutputStream.close();
            }
        }

        LOGGER.info("\tSaving signed hash as " + signedPath.toString());
    }

    if (writeInfoFile) {
        Path infoPath = new Path(destFilename + DOT_MAPINFO);
        FSDataOutputStream infoStream = fs.create(infoPath, true);// overwrite
        fs.setPermission(infoPath, ALL_PERMISSIONS);
        OutputStreamWriter infoWriter = new OutputStreamWriter(infoStream);
        infoWriter.write("size\t");
        infoWriter.write(Long.toString(unsignedHash.size64()));
        infoWriter.write("\n");
        infoWriter.write("unsignedBits\t");
        infoWriter.write(Long.toString((unsignedHash).numBits()));
        infoWriter.write("\n");
        if (generateSigned) {
            infoWriter.write("signedWidth\t64\n");
        }
        infoWriter.close();
        infoStream.close();
    }

    return unsignedHash.size64();
}

From source file:com.yahoo.glimmer.util.ComputeHashTool.java

License:Open Source License

private static OutputStream createOutputStream(FileSystem fs, Path path) throws IOException {
    FSDataOutputStream outStream = fs.create(path, true);// overwrite;
    fs.setPermission(path, ALL_PERMISSIONS);
    return outStream;
}

From source file:com.yahoo.semsearch.fastlinking.io.WikipediaDocnoMapping.java

License:Apache License

/**
 * Creates a mappings file from the contents of a flat text file containing docid to docno
 * mappings. This method is used by {@link WikipediaDocnoMappingBuilder} internally.
 *
 * @param inputFile flat text file containing docid to docno mappings
 * @param outputFile output mappings file
 * @throws IOException/*  w  w w .  j av a2 s.com*/
 */
static public void writeDocnoMappingData(FileSystem fs, String inputFile, int n, String outputFile)
        throws IOException {
    LOG.info("Writing " + n + " docids to " + outputFile);
    LineReader reader = new LineReader(fs.open(new Path(inputFile)));

    int cnt = 0;
    Text line = new Text();

    FSDataOutputStream out = fs.create(new Path(outputFile), true);
    out.writeInt(n);
    for (int i = 0; i < n; i++) {
        reader.readLine(line);
        String[] arr = line.toString().split("\\t");
        out.writeInt(Integer.parseInt(arr[0]));
        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " articles");
        }
    }
    out.close();
    reader.close();
    LOG.info("Done!");
}

From source file:com.yolodata.tbana.testutils.HadoopFileTestUtils.java

License:Open Source License

public static void createFileWithContent(FileSystem fs, Path path, String content) throws IOException {
    FSDataOutputStream fso = fs.create(path, true);
    fso.writeBytes(content);/*from  w  w  w  .  j a  v  a 2 s .  c om*/
    fso.flush();
    fso.close();
}

From source file:com.zjy.mongo.output.MongoRecordWriter.java

License:Apache License

/**
 * Create a MongoRecordWriter that targets multiple DBCollections.
 * @param c a list of DBCollections//from  w ww .  j  av a 2  s.  c o  m
 * @param ctx the TaskAttemptContext
 */
public MongoRecordWriter(final List<DBCollection> c, final TaskAttemptContext ctx) {
    collections = new ArrayList<DBCollection>(c);
    context = ctx;
    bsonWritable = new BSONWritable();

    // Initialize output stream.
    try {
        FileSystem fs = FileSystem.get(ctx.getConfiguration());
        Path outputPath = MongoOutputCommitter.getTaskAttemptPath(ctx);
        LOG.info("Writing to temporary file: " + outputPath.toString());
        outputStream = fs.create(outputPath, true);
    } catch (IOException e) {
        LOG.error("Could not open temporary file for buffering Mongo output", e);
    }
}

From source file:com.zjy.mongo.splitter.BSONSplitter.java

License:Apache License

/**
 * Write out the splits file, if doing so has been enabled. Splits must
 * already have been calculated previously by a call to {@link
 * #readSplitsForFile readSplitsForFile} or {@link #readSplits readSplits}.
 *
 * @see com.zjy.mongo.util.MongoConfigUtil#BSON_WRITE_SPLITS
 *
 * @throws IOException when an error occurs writing the file
 *//*ww  w .jav  a  2  s . c o m*/
public void writeSplits() throws IOException {
    if (getConf().getBoolean("bson.split.write_splits", true)) {
        LOG.info("Writing splits to disk.");
    } else {
        LOG.info("bson.split.write_splits is set to false - skipping writing splits to disk.");
        return;
    }

    if (splitsList == null) {
        LOG.info("No splits found, skipping write of splits file.");
    }

    Path outputPath = getSplitsFilePath(inputPath, getConf());
    FileSystem pathFileSystem = outputPath.getFileSystem(getConf());
    FSDataOutputStream fsDataOut = null;
    try {
        fsDataOut = pathFileSystem.create(outputPath, false);
        for (FileSplit inputSplit : splitsList) {
            BSONObject splitObj = BasicDBObjectBuilder.start().add("s", inputSplit.getStart())
                    .add("l", inputSplit.getLength()).get();
            byte[] encodedObj = bsonEnc.encode(splitObj);
            fsDataOut.write(encodedObj, 0, encodedObj.length);
        }
    } catch (IOException e) {
        LOG.error("Could not create splits file: " + e.getMessage());
        throw e;
    } finally {
        if (fsDataOut != null) {
            fsDataOut.close();
        }
    }
}

From source file:com.zqh.giraph.standalone.SimpleAggregatorWriter.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override//  w ww  .  ja  v  a 2  s  .  c om
public void initialize(Context context, long applicationAttempt) throws IOException {
    setFilename(applicationAttempt);
    Path p = new Path(FILENAME);
    FileSystem fs = FileSystem.get(context.getConfiguration());
    output = fs.create(p, true);
}

From source file:crunch.MaxTemperature.java

License:Apache License

  public static void main(String[] args) throws Exception {
  String localSrc = args[0];//from   w w w.j av a  2  s.  c  o  m
  String dst = args[1];
    
  InputStream in = new BufferedInputStream(new FileInputStream(localSrc)); // XXX InputStream = BufferedInputStream(new FileInputStream())
    
  Configuration conf = new Configuration(); // XXX hadoop.conf.Configuration
  FileSystem fs = FileSystem.get(URI.create(dst), conf); // XXX hadoop.fs.FileSystem from URI.create + hadoop.conf.Configuration
  OutputStream out = fs.create(new Path(dst), new Progressable() { // XXX fs.create -> stream hadoop.util.Progressable
    public void progress() {
      System.out.print(".");
    }
  });
    
  IOUtils.copyBytes(in, out, 4096, true); // XXX hadoop.IOUtils.copyBytes - CLOSES in and out at the end
}