Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, short replication) throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.ema.hadoop.test_hdfs.TestWrite.java

public static void main(String[] args) throws IOException, URISyntaxException {

    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/text_file_write.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);/*www. j  av a 2 s. c  o m*/
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    br.write("This is just a test to check if it is possible to write a file on HDFS using the Java API");
    br.close();
    hdfs.close();

}

From source file:com.ema.hadoop.wordcount.WordCount_cache.java

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);//from   w ww  .j  av  a  2s .c  o  m
    }

    // First we write the stop word list
    // it could also be a file manually loaded into HDFS

    String[] stopwords = { "the", "a" };
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration);
    Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt");
    if (hdfs.exists(file)) {
        hdfs.delete(file, true);
    }
    OutputStream os = hdfs.create(file, new Progressable() {
        @Override
        public void progress() {
            out.println("...bytes written");
        }
    });
    BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
    for (String w : stopwords) {
        br.write(w + "\n");
    }

    br.close();
    hdfs.close();

    Job job = Job.getInstance();
    job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri());

    job.setJarByClass(WordCount_cache.class);
    job.setJobName("Word count job");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(WCMapper_cache.class);
    job.setReducerClass(WCReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.github.jmabuin.blaspark.io.IO.java

License:Open Source License

public static void writeVectorToFileInHDFS(String file, DenseVector vector, Configuration conf) {

    try {/*from w  w  w .  java2 s.  c  o m*/
        FileSystem fs = FileSystem.get(conf);

        Path pt = new Path(file);

        //FileSystem fileSystem = FileSystem.get(context.getConfiguration());
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true)));

        bw.write("%%MatrixMarket matrix array real general");
        bw.newLine();
        bw.write(vector.size() + " 1");
        bw.newLine();

        for (int i = 0; i < vector.size(); i++) {
            bw.write(String.valueOf(vector.apply(i)));
            bw.newLine();
        }

        bw.close();
        //fs.close();

    } catch (IOException e) {
        LOG.error("Error in " + IO.class.getName() + ": " + e.getMessage());
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.github.jmabuin.blaspark.io.IO.java

License:Open Source License

public static void writeMatrixToFileInHDFS(String file, DistributedMatrix matrix, Configuration conf) {

    try {/*from w  ww.  j av  a  2s .  co  m*/
        List<IndexedRow> localRows;
        long numRows = 0;
        long numCols = 0;

        FileSystem fs = FileSystem.get(conf);

        Path pt = new Path(file);

        //FileSystem fileSystem = FileSystem.get(context.getConfiguration());
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true)));

        JavaRDD<IndexedRow> rows;

        if (matrix.getClass() == IndexedRowMatrix.class) {
            rows = ((IndexedRowMatrix) matrix).rows().toJavaRDD();
        } else if (matrix.getClass() == CoordinateMatrix.class) {
            rows = ((CoordinateMatrix) matrix).toIndexedRowMatrix().rows().toJavaRDD();
        } else if (matrix.getClass() == BlockMatrix.class) {
            rows = ((BlockMatrix) matrix).toIndexedRowMatrix().rows().toJavaRDD();
        } else {
            rows = null;
        }

        localRows = rows.collect();

        Vector vectors[] = new Vector[localRows.size()];

        for (int i = 0; i < localRows.size(); i++) {
            vectors[(int) localRows.get(i).index()] = localRows.get(i).vector();
        }

        numRows = matrix.numRows();
        numCols = matrix.numCols();

        bw.write("%%MatrixMarket matrix array real general");
        bw.newLine();
        bw.write(numRows + " " + numCols + " " + (numRows * numCols));
        bw.newLine();

        for (int i = 0; i < vectors.length; i++) {
            bw.write(i + ":");
            for (int j = 0; j < vectors[i].size(); j++) {
                bw.write(String.valueOf(vectors[i].apply(j)) + ",");
            }

            bw.newLine();
        }

        bw.close();
        //fs.close();

    } catch (IOException e) {
        LOG.error("Error in " + IO.class.getName() + ": " + e.getMessage());
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java

License:Apache License

/**
 * Copies data from the given input stream to an HDFS file at the given path. This method will close the input stream.
 *///from  ww  w  .j a va 2 s. c  o  m
protected final void copyStreamToHdfs(InputStream resource, String hdfsDestFileName) throws IOException {
    FileSystem fs = getFileSystem();

    FSDataOutputStream os = fs.create(new Path(hdfsDestFileName), false);

    IOUtils.copyBytes(resource, os, fs.getConf(), true);
}

From source file:com.hp.hpit.cs.MyTextOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    CompressionCodec codec = null;/*from   ww  w  .jav a2  s  .co  m*/
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java

License:Open Source License

public void writeTextCellFileToHDFS(JobConf job, String dir, String lpdir) throws IOException {
    long key = getKeyFromFilePath(lpdir);
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(dir + "/" + key);
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
    try {/*from ww  w.j av  a2  s.c o m*/
        //for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();

        String[] fnameBlocks = new File(lpdir).list();
        for (String fnameBlock : fnameBlocks) {
            LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock);
            for (Cell c : tmp) {
                sb.append(c.getRow());
                sb.append(' ');
                sb.append(c.getCol());
                sb.append(' ');
                sb.append(c.getValue());
                sb.append('\n');
                out.write(sb.toString());
                sb.setLength(0);
            }
        }
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * /*ww w. j  a va  2s  .c  om*/
 * @param fnameNew
 * @param outMo
 * @param inMO
 * @throws DMLRuntimeException
 */
private void mergeTextCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO)
        throws DMLRuntimeException {
    try {
        //delete target file if already exists
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);

        if (ALLOW_COPY_CELLFILES) {
            copyAllFiles(fnameNew, inMO);
            return; //we're done
        }

        //actual merge
        JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
        FileSystem fs = FileSystem.get(job);
        Path path = new Path(fnameNew);
        BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        String valueStr = null;

        try {
            for (MatrixObject in : inMO) //read/write all inputs
            {
                LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname="
                        + in.getFileName() + ") via stream merge");

                JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf());
                Path tmpPath = new Path(in.getFileName());
                FileInputFormat.addInputPath(tmpJob, tmpPath);
                TextInputFormat informat = new TextInputFormat();
                informat.configure(tmpJob);
                InputSplit[] splits = informat.getSplits(tmpJob, 1);

                LongWritable key = new LongWritable();
                Text value = new Text();

                for (InputSplit split : splits) {
                    RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, tmpJob,
                            Reporter.NULL);
                    try {
                        while (reader.next(key, value)) {
                            valueStr = value.toString().trim();
                            out.write(valueStr + "\n");
                        }
                    } finally {
                        if (reader != null)
                            reader.close();
                    }
                }
            }
        } finally {
            if (out != null)
                out.close();
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Unable to merge text cell results.", ex);
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java

License:Open Source License

/**
 * //from  w w w.  j a  va2  s  .  c  o m
 * @param fnameStaging
 * @param fnameStagingCompare
 * @param fnameNew
 * @param metadata
 * @param withCompare
 * @throws IOException
 * @throws DMLRuntimeException
 */
private void createTextCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew,
        MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    FileSystem fs = FileSystem.get(job);
    Path path = new Path(fnameNew);

    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();

    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));
    try {
        //for obj reuse and preventing repeated buffer re-allocations
        StringBuilder sb = new StringBuilder();

        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++)
            for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
                File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
                File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
                MatrixBlock mb = null;

                long row_offset = (brow - 1) * brlen + 1;
                long col_offset = (bcol - 1) * bclen + 1;

                if (dir.exists()) {
                    if (withCompare && dir2.exists()) //WITH COMPARE BLOCK
                    {
                        //copy only values that are different from the original
                        String[] lnames2 = dir2.list();
                        if (lnames2.length != 1) //there should be exactly 1 compare block
                            throw new DMLRuntimeException(
                                    "Unable to merge results because multiple compare blocks found.");
                        mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen,
                                bclen);
                        boolean appendOnly = mb.isInSparseFormat();
                        double[][] compare = DataConverter.convertToDoubleMatrix(mb);

                        String[] lnames = dir.list();
                        for (String lname : lnames) {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname,
                                    brlen, bclen);
                            mergeWithComp(mb, tmp, compare);
                        }

                        //sort sparse and exam sparsity due to append-only
                        if (appendOnly)
                            mb.sortSparseRows();

                        //change sparsity if required after 
                        mb.examSparsity();
                    } else //WITHOUT COMPARE BLOCK
                    {
                        //copy all non-zeros from all workers
                        String[] lnames = dir.list();
                        boolean appendOnly = false;
                        for (String lname : lnames) {
                            if (mb == null) {
                                mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen,
                                        bclen);
                                appendOnly = mb.isInSparseFormat();
                            } else {
                                MatrixBlock tmp = StagingFileUtils
                                        .readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                                mergeWithoutComp(mb, tmp, appendOnly);
                            }
                        }

                        //sort sparse due to append-only
                        if (appendOnly)
                            mb.sortSparseRows();

                        //change sparsity if required after 
                        mb.examSparsity();
                    }
                }

                //write the block to text cell
                if (mb != null) {
                    if (mb.isInSparseFormat()) {
                        SparseRowsIterator iter = mb.getSparseRowsIterator();
                        while (iter.hasNext()) {
                            IJV lcell = iter.next();
                            sb.append(row_offset + lcell.i);
                            sb.append(' ');
                            sb.append(col_offset + lcell.j);
                            sb.append(' ');
                            sb.append(lcell.v);
                            sb.append('\n');
                            out.write(sb.toString());
                            sb.setLength(0);
                            written = true;
                        }
                    } else {
                        for (int i = 0; i < brlen; i++)
                            for (int j = 0; j < bclen; j++) {
                                double lvalue = mb.getValueDenseUnsafe(i, j);
                                if (lvalue != 0) //for nnz
                                {
                                    sb.append(row_offset + i);
                                    sb.append(' ');
                                    sb.append(col_offset + j);
                                    sb.append(' ');
                                    sb.append(lvalue);
                                    sb.append('\n');
                                    out.write(sb.toString());
                                    sb.setLength(0);
                                    written = true;
                                }
                            }
                    }
                }
            }

        if (!written)
            out.write("1 1 0\n");
    } finally {
        if (out != null)
            out.close();
    }
}

From source file:com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock.java

License:Open Source License

/**
 * /*w ww  . j a v  a  2 s  .  co m*/
 * @param fname
 * @param tasks
 * @return
 * @throws DMLRuntimeException
 * @throws IOException
 */
private String writeTasksToFile(String fname, List<Task> tasks, int maxDigits)
        throws DMLRuntimeException, IOException {
    BufferedWriter br = null;
    try {
        Path path = new Path(fname);
        FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
        br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)));

        boolean flagFirst = true; //workaround for keeping gen order
        for (Task t : tasks) {
            br.write(createTaskFileLine(t, maxDigits, flagFirst));
            if (flagFirst)
                flagFirst = false;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException("Error writing tasks to taskfile " + fname, ex);
    } finally {
        if (br != null)
            br.close();
    }

    return fname;
}