Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f) throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue
        //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter()
        //getRecordWriter()
        final TaskAttemptContext context) throws IOException, InterruptedException {

    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// 

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/*from w w  w.  j a va  2  s  . c  om*/

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue

        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv

                throws IOException {
            // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);//

            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {// if
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }

            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * 
         * @param family
         * 
         * @return A WriterLength, containing a new StoreFile.Writer.
         * 
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
                    .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
                    .build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();// 
            dos.close();// 
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }

    };
}

From source file:com.ostor.dedup.hadoop.DedupHadoopObject.java

License:Open Source License

public void dumpToHDFS(FileSystem fs, Path path) throws Exception {
    logger.debug("Dump object - " + getName() + " to path - " + path);

    FSDataOutputStream fdos = fs.create(path);

    fdos.write(dumpToString().getBytes());

    fdos.flush();/*  w  w w .  j av a2 s .c  o  m*/
    fdos.close();
}

From source file:com.ostor.dedup.hadoop.DedupHadoopSegment.java

License:Open Source License

public void dumpToHDFS(FileSystem fs, Path path) throws Exception {
    logger.debug("Dump segment - " + getId() + " to path - " + path);

    FSDataOutputStream fdos = fs.create(path);

    fdos.write(dumpMetaData().getBytes());

    fdos.flush();//from w  ww.  jav a2s .  com
    fdos.close();

    logger.debug("Dump segment data of length - " + getData().length);

    FSDataOutputStream fdosData = fs
            .create(new Path(path.toString() + DedupSegmentStor.SERIALIZED_DATA_SUFFIX));

    fdosData.write(getData());

    fdosData.flush();
    fdosData.close();
}

From source file:com.phantom.hadoop.examples.dancing.DistributedPentomino.java

License:Apache License

/**
 * Create the input file with all of the possible combinations of the given
 * depth.//from  w w  w  . j a v  a  2s  . co m
 * 
 * @param fs
 *            the filesystem to write into
 * @param dir
 *            the directory to write the input file into
 * @param pent
 *            the puzzle
 * @param depth
 *            the depth to explore when generating prefixes
 */
private static long createInputDirectory(FileSystem fs, Path dir, Pentomino pent, int depth)
        throws IOException {
    fs.mkdirs(dir);
    List<int[]> splits = pent.getSplits(depth);
    Path input = new Path(dir, "part1");
    PrintWriter file = new PrintWriter(
            new OutputStreamWriter(new BufferedOutputStream(fs.create(input), 64 * 1024), Charsets.UTF_8));
    for (int[] prefix : splits) {
        for (int i = 0; i < prefix.length; ++i) {
            if (i != 0) {
                file.print(',');
            }
            file.print(prefix[i]);
        }
        file.print('\n');
    }
    file.close();
    return fs.getFileStatus(input).getLen();
}

From source file:com.phantom.hadoop.examples.pi.Util.java

License:Apache License

/** Write results */
static void writeResults(String name, List<TaskResult> results, FileSystem fs, String dir) throws IOException {
    final Path outfile = new Path(dir, name + ".txt");
    Util.out.println(name + "> writing results to " + outfile);
    final PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outfile), Charsets.UTF_8), true);
    try {//from  w  w  w  .j a v  a 2 s  .co  m
        for (TaskResult r : results)
            out.println(r);
    } finally {
        out.close();
    }
}

From source file:com.phantom.hadoop.examples.terasort.TeraOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException {
    Path file = getDefaultWorkFile(job, "");
    FileSystem fs = file.getFileSystem(job.getConfiguration());
    FSDataOutputStream fileOut = fs.create(file);
    return new TeraRecordWriter(fileOut, job);
}

From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java

License:Open Source License

private static void setupTest(Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);

    Path path = new Path(testInput);
    if (fileSystem.exists(path))
        fileSystem.delete(path, true);//  ww  w  . j a va2s  . com

    ArrayList<MRVertex> vertices = new ArrayList<MRVertex>();

    MRVertex v0 = new MRVertex(0, conf);
    v0.addEdgeTo(2);
    vertices.add(v0);

    MRVertex v1 = new MRVertex(1, conf);
    v1.addEdgeTo(2);
    vertices.add(v1);

    MRVertex v2 = new MRVertex(2, conf);
    v2.addEdgeTo(3);
    vertices.add(v2);

    MRVertex v3 = new MRVertex(3, conf);
    v3.addEdgeTo(4);
    vertices.add(v3);

    MRVertex v4 = new MRVertex(4, conf);
    v4.addEdgeTo(5);
    v4.addEdgeTo(6);
    vertices.add(v4);

    MRVertex v5 = new MRVertex(5, conf);
    vertices.add(v5);

    MRVertex v6 = new MRVertex(6, conf);
    v6.addEdgeTo(7);
    vertices.add(v6);

    MRVertex v7 = new MRVertex(7, conf);
    vertices.add(v7);

    FSDataOutputStream out = fileSystem.create(path);
    for (MRVertex vertex : vertices) {
        Text text = vertex.toText(MRVertex.EdgeFormat.EDGES_TO);
        byte[] bytes = text.copyBytes();
        for (byte b : bytes)
            out.write(b);
        out.write('\n');
    }
    out.close();

    fileSystem.close();
}

From source file:com.philiphubbard.sabe.MRAssembler.java

License:Open Source License

public boolean run(Path inputPath, Path outputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();

    // Job.getInstance() copies the Configuration argument, so set its properties first.

    conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true);
    conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false);
    conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength);
    conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true);
    conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage);
    conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1);

    Job buildJob = Job.getInstance(conf);
    buildJob.setJobName("mrassemblerbuild");

    Path buildInputPath = inputPath;
    Path buildOutputPath = new Path("sabe.MRAssemblerTmp");

    System.out.println("sabe.MRAssembler starting vertex construction");

    MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath);

    if (!buildJob.waitForCompletion(true))
        return false;

    ////  w w w . ja v a 2s.c o m

    Path compressInputPath = new Path(buildOutputPath.toString() + "/chain");
    Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress");

    int iter = 0;
    boolean keepGoing = true;
    MRCompressChains.beginIteration();
    while (keepGoing) {
        Job compressJob = Job.getInstance(conf);
        compressJob.setJobName("mrassemblercompress");

        System.out.println("sabe.MRAssembler starting compression iteration " + iter);

        MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath);

        if (!compressJob.waitForCompletion(true))
            System.exit(1);

        iter++;
        keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath);
    }

    System.out.println("sabe.MRAssembler made " + iter + " compression iterations");

    //

    Path branchPath = new Path(buildOutputPath.toString() + "/branch");
    Path chainPath = compressOutputPath;

    FileSystem fileSystem = FileSystem.get(conf);

    Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath);
    if (graph != null) {
        ArrayList<String> result = graph.assemble();

        FSDataOutputStream out = fileSystem.create(outputPath);
        for (String seq : result) {
            out.writeBytes(seq);
            out.writeBytes("\n");
        }
    }

    //

    fileSystem.delete(buildOutputPath, true);

    fileSystem.close();

    return true;
}

From source file:com.philiphubbard.sabe.MRAssemblerTest1.java

License:Open Source License

private static void setupTest(Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);

    Path path = new Path(testInput);
    if (fileSystem.exists(path))
        fileSystem.delete(path, true);/*w  w  w. j a  va2s  .c om*/

    ArrayList<Text> reads = new ArrayList<Text>();

    // Goal: AATTCGGCCTTCGGCAT

    reads.add(new Text("AATTCGGC\n"));
    reads.add(new Text("CTTCGGCAT\n"));

    reads.add(new Text("AATT\n"));
    reads.add(new Text("CGGCCTTCGGCAT\n"));

    reads.add(new Text("AATTCGGCCTTCG\n"));
    reads.add(new Text("GCAT\n"));

    FSDataOutputStream out = fileSystem.create(path);
    for (Text read : reads) {
        byte[] bytes = read.copyBytes();
        for (byte b : bytes)
            out.write(b);
    }
    out.close();

    fileSystem.close();
}

From source file:com.philiphubbard.sabe.MRAssemblerTest2.java

License:Open Source License

private static void setupTest(Configuration conf) throws IOException {
    FileSystem fileSystem = FileSystem.get(conf);

    Path path = new Path(testInput);
    if (fileSystem.exists(path))
        fileSystem.delete(path, true);//  w  ww .  jav a2  s .  c  o m

    ArrayList<Text> reads = new ArrayList<Text>();

    // The expected result:
    // CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACCCGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC
    // Note that it has the following pattern:
    // segment 0: CCCTTTCTGT 
    // segment 1, which will be repeated: TGACCCATCA 
    // segment 2: TTGTTTAGTA 
    // segment 3, which will be repeated: ACCCGCGGGA 
    // segment 4: TGCCTGGCAG 
    // segment 3, again: ACCCGCGGGA 
    // segment 5: CGATCTCCTC
    // segment 1, again: TGACCCATCA 
    // segment 6: TCGAAATTCC

    reads.add(new Text("CCCTTTC\n"));
    // Error: initial T omitted.
    reads.add(new Text("GTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACC"));
    reads.add(new Text("CGCGGGACGAT\n"));
    // Error: final C omitted.
    reads.add(new Text("CTCCTCTGACCCATCATCGAAATTC\n"));

    reads.add(new Text("CCCTTTCTGTTGACCCAT\n"));
    // Error: final C replaced with G.
    reads.add(new Text("CATTGTTTAGTAACCCGCGGGATGCCTGGCAGACG\n"));
    reads.add(new Text("CGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC\n"));

    // Error: C at index 14 replaced with A.
    reads.add(new Text("CCCTTTCTGTTGACACATCATTGTTTAGTAAC"));
    reads.add(new Text("CCGCGGGATGCC\n"));
    // Error: C at index 25 omitted.
    reads.add(new Text("TGGCAGACCCGCGGGACGATCTCCTTGACCCATCATCGAAATTCC\n"));

    reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTG\n"));
    // Error: G at index 10 replaced with T.
    reads.add(new Text("GCAGACCCGCTGGACGA\n"));
    reads.add(new Text("TCTCCTCTGACCCATCATCGAAATTCC\n"));

    reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGC"));
    // Error: final G omitted.
    reads.add(new Text("CTGGCAGACCCGC\n"));
    reads.add(new Text("GGACGATCTCCTCT\n"));
    // Error: CG at index 10 transposed to GC
    reads.add(new Text("GACCCATCATCGAAATTCC\n"));

    FSDataOutputStream out = fileSystem.create(path);
    for (Text read : reads) {
        byte[] bytes = read.copyBytes();
        for (byte b : bytes)
            out.write(b);
    }
    out.close();

    fileSystem.close();
}