List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, short replication) throws IOException
From source file:com.ema.hadoop.test_hdfs.TestWrite.java
public static void main(String[] args) throws IOException, URISyntaxException { Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); Path file = new Path("hdfs://localhost:9000/user/student/text_file_write.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true);/*www. j av a 2 s. c o m*/ } OutputStream os = hdfs.create(file, new Progressable() { @Override public void progress() { out.println("...bytes written"); } }); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); br.write("This is just a test to check if it is possible to write a file on HDFS using the Java API"); br.close(); hdfs.close(); }
From source file:com.ema.hadoop.wordcount.WordCount_cache.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);//from w ww .j av a 2s .c o m } // First we write the stop word list // it could also be a file manually loaded into HDFS String[] stopwords = { "the", "a" }; Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:9000"), configuration); Path file = new Path("hdfs://localhost:9000/user/student/stop_words.txt"); if (hdfs.exists(file)) { hdfs.delete(file, true); } OutputStream os = hdfs.create(file, new Progressable() { @Override public void progress() { out.println("...bytes written"); } }); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); for (String w : stopwords) { br.write(w + "\n"); } br.close(); hdfs.close(); Job job = Job.getInstance(); job.addCacheFile(new Path("hdfs://localhost:9000/user/student/stop_words.txt").toUri()); job.setJarByClass(WordCount_cache.class); job.setJobName("Word count job"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(WCMapper_cache.class); job.setReducerClass(WCReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.github.jmabuin.blaspark.io.IO.java
License:Open Source License
public static void writeVectorToFileInHDFS(String file, DenseVector vector, Configuration conf) { try {/*from w w w . java2 s. c o m*/ FileSystem fs = FileSystem.get(conf); Path pt = new Path(file); //FileSystem fileSystem = FileSystem.get(context.getConfiguration()); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true))); bw.write("%%MatrixMarket matrix array real general"); bw.newLine(); bw.write(vector.size() + " 1"); bw.newLine(); for (int i = 0; i < vector.size(); i++) { bw.write(String.valueOf(vector.apply(i))); bw.newLine(); } bw.close(); //fs.close(); } catch (IOException e) { LOG.error("Error in " + IO.class.getName() + ": " + e.getMessage()); e.printStackTrace(); System.exit(1); } }
From source file:com.github.jmabuin.blaspark.io.IO.java
License:Open Source License
public static void writeMatrixToFileInHDFS(String file, DistributedMatrix matrix, Configuration conf) { try {/*from w ww. j av a 2s . co m*/ List<IndexedRow> localRows; long numRows = 0; long numCols = 0; FileSystem fs = FileSystem.get(conf); Path pt = new Path(file); //FileSystem fileSystem = FileSystem.get(context.getConfiguration()); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true))); JavaRDD<IndexedRow> rows; if (matrix.getClass() == IndexedRowMatrix.class) { rows = ((IndexedRowMatrix) matrix).rows().toJavaRDD(); } else if (matrix.getClass() == CoordinateMatrix.class) { rows = ((CoordinateMatrix) matrix).toIndexedRowMatrix().rows().toJavaRDD(); } else if (matrix.getClass() == BlockMatrix.class) { rows = ((BlockMatrix) matrix).toIndexedRowMatrix().rows().toJavaRDD(); } else { rows = null; } localRows = rows.collect(); Vector vectors[] = new Vector[localRows.size()]; for (int i = 0; i < localRows.size(); i++) { vectors[(int) localRows.get(i).index()] = localRows.get(i).vector(); } numRows = matrix.numRows(); numCols = matrix.numCols(); bw.write("%%MatrixMarket matrix array real general"); bw.newLine(); bw.write(numRows + " " + numCols + " " + (numRows * numCols)); bw.newLine(); for (int i = 0; i < vectors.length; i++) { bw.write(i + ":"); for (int j = 0; j < vectors[i].size(); j++) { bw.write(String.valueOf(vectors[i].apply(j)) + ","); } bw.newLine(); } bw.close(); //fs.close(); } catch (IOException e) { LOG.error("Error in " + IO.class.getName() + ": " + e.getMessage()); e.printStackTrace(); System.exit(1); } }
From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java
License:Apache License
/** * Copies data from the given input stream to an HDFS file at the given path. This method will close the input stream. *///from ww w .j a va 2 s. c o m protected final void copyStreamToHdfs(InputStream resource, String hdfsDestFileName) throws IOException { FileSystem fs = getFileSystem(); FSDataOutputStream os = fs.create(new Path(hdfsDestFileName), false); IOUtils.copyBytes(resource, os, fs.getConf(), true); }
From source file:com.hp.hpit.cs.MyTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); CompressionCodec codec = null;/*from ww w .jav a2 s .co m*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.DataPartitionerLocal.java
License:Open Source License
public void writeTextCellFileToHDFS(JobConf job, String dir, String lpdir) throws IOException { long key = getKeyFromFilePath(lpdir); FileSystem fs = FileSystem.get(job); Path path = new Path(dir + "/" + key); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); try {/*from ww w.j av a2 s.c o m*/ //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); String[] fnameBlocks = new File(lpdir).list(); for (String fnameBlock : fnameBlocks) { LinkedList<Cell> tmp = StagingFileUtils.readCellListFromLocal(lpdir + "/" + fnameBlock); for (Cell c : tmp) { sb.append(c.getRow()); sb.append(' '); sb.append(c.getCol()); sb.append(' '); sb.append(c.getValue()); sb.append('\n'); out.write(sb.toString()); sb.setLength(0); } } } finally { if (out != null) out.close(); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * /*ww w. j a va 2s .c om*/ * @param fnameNew * @param outMo * @param inMO * @throws DMLRuntimeException */ private void mergeTextCellWithoutComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO) throws DMLRuntimeException { try { //delete target file if already exists MapReduceTool.deleteFileIfExistOnHDFS(fnameNew); if (ALLOW_COPY_CELLFILES) { copyAllFiles(fnameNew, inMO); return; //we're done } //actual merge JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fnameNew); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); String valueStr = null; try { for (MatrixObject in : inMO) //read/write all inputs { LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname=" + in.getFileName() + ") via stream merge"); JobConf tmpJob = new JobConf(ConfigurationManager.getCachedJobConf()); Path tmpPath = new Path(in.getFileName()); FileInputFormat.addInputPath(tmpJob, tmpPath); TextInputFormat informat = new TextInputFormat(); informat.configure(tmpJob); InputSplit[] splits = informat.getSplits(tmpJob, 1); LongWritable key = new LongWritable(); Text value = new Text(); for (InputSplit split : splits) { RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, tmpJob, Reporter.NULL); try { while (reader.next(key, value)) { valueStr = value.toString().trim(); out.write(valueStr + "\n"); } } finally { if (reader != null) reader.close(); } } } } finally { if (out != null) out.close(); } } catch (Exception ex) { throw new DMLRuntimeException("Unable to merge text cell results.", ex); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * //from w w w. j a va2 s . c o m * @param fnameStaging * @param fnameStagingCompare * @param fnameNew * @param metadata * @param withCompare * @throws IOException * @throws DMLRuntimeException */ private void createTextCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fnameNew); MatrixCharacteristics mc = metadata.getMatrixCharacteristics(); long rlen = mc.getRows(); long clen = mc.getCols(); int brlen = mc.getRowsPerBlock(); int bclen = mc.getColsPerBlock(); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); try { //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); boolean written = false; for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) { File dir = new File(fnameStaging + "/" + brow + "_" + bcol); File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol); MatrixBlock mb = null; long row_offset = (brow - 1) * brlen + 1; long col_offset = (bcol - 1) * bclen + 1; if (dir.exists()) { if (withCompare && dir2.exists()) //WITH COMPARE BLOCK { //copy only values that are different from the original String[] lnames2 = dir2.list(); if (lnames2.length != 1) //there should be exactly 1 compare block throw new DMLRuntimeException( "Unable to merge results because multiple compare blocks found."); mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen); boolean appendOnly = mb.isInSparseFormat(); double[][] compare = DataConverter.convertToDoubleMatrix(mb); String[] lnames = dir.list(); for (String lname : lnames) { MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen); mergeWithComp(mb, tmp, compare); } //sort sparse and exam sparsity due to append-only if (appendOnly) mb.sortSparseRows(); //change sparsity if required after mb.examSparsity(); } else //WITHOUT COMPARE BLOCK { //copy all non-zeros from all workers String[] lnames = dir.list(); boolean appendOnly = false; for (String lname : lnames) { if (mb == null) { mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen); appendOnly = mb.isInSparseFormat(); } else { MatrixBlock tmp = StagingFileUtils .readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen); mergeWithoutComp(mb, tmp, appendOnly); } } //sort sparse due to append-only if (appendOnly) mb.sortSparseRows(); //change sparsity if required after mb.examSparsity(); } } //write the block to text cell if (mb != null) { if (mb.isInSparseFormat()) { SparseRowsIterator iter = mb.getSparseRowsIterator(); while (iter.hasNext()) { IJV lcell = iter.next(); sb.append(row_offset + lcell.i); sb.append(' '); sb.append(col_offset + lcell.j); sb.append(' '); sb.append(lcell.v); sb.append('\n'); out.write(sb.toString()); sb.setLength(0); written = true; } } else { for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) { double lvalue = mb.getValueDenseUnsafe(i, j); if (lvalue != 0) //for nnz { sb.append(row_offset + i); sb.append(' '); sb.append(col_offset + j); sb.append(' '); sb.append(lvalue); sb.append('\n'); out.write(sb.toString()); sb.setLength(0); written = true; } } } } } if (!written) out.write("1 1 0\n"); } finally { if (out != null) out.close(); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.ParForProgramBlock.java
License:Open Source License
/** * /*w ww . j a v a 2 s . co m*/ * @param fname * @param tasks * @return * @throws DMLRuntimeException * @throws IOException */ private String writeTasksToFile(String fname, List<Task> tasks, int maxDigits) throws DMLRuntimeException, IOException { BufferedWriter br = null; try { Path path = new Path(fname); FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); boolean flagFirst = true; //workaround for keeping gen order for (Task t : tasks) { br.write(createTaskFileLine(t, maxDigits, flagFirst)); if (flagFirst) flagFirst = false; } } catch (Exception ex) { throw new DMLRuntimeException("Error writing tasks to taskfile " + fname, ex); } finally { if (br != null) br.close(); } return fname; }