List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, short replication) throws IOException
From source file:com.marklogic.contentpump.OutputArchive.java
License:Apache License
private void newOutputStream() throws IOException { // use the constructor filename for the first zip, // then add filecount to subsequent archives, if any. int count = fileCount.getAndIncrement(); currPath = newPackagePath(basePath, count, 6); if (outputStream != null) { if (LOG.isDebugEnabled()) { LOG.debug("closing output archive: " + currPath); }/*from ww w .ja v a2 s .c o m*/ outputStream.flush(); outputStream.close(); } currentFileBytes = 0; currentEntries = 0; Path zpath = new Path(currPath); FileSystem fs = zpath.getFileSystem(conf); if (fs.exists(zpath)) { throw new IOException(zpath + " already exists."); } if (LOG.isDebugEnabled()) { LOG.debug("Creating output archive: " + zpath); LOG.debug("Default charset: " + Charset.defaultCharset()); } // if fs instanceof DistributedFileSystem, use hadoop api; otherwise, // use java api if (fs instanceof DistributedFileSystem) { FSDataOutputStream fsout = fs.create(zpath, false); outputStream = new ZipOutputStream(fsout); } else { File f = new File(zpath.toUri().getPath()); if (!f.exists()) { f.getParentFile().mkdirs(); f.createNewFile(); } FileOutputStream fos = new FileOutputStream(f, false); outputStream = new ZipOutputStream(fos); } }
From source file:com.marklogic.contentpump.SingleDocumentWriter.java
License:Apache License
@Override public void write(DocumentURI uri, MarkLogicDocument content) throws IOException, InterruptedException { OutputStream os = null;//from w w w .j av a 2 s . c o m try { String childPath = URIUtil.getPathFromURI(uri); Path path; if (childPath.charAt(0) == '/') { // concatenate outputPath with path to form the path path = new Path(dir.toString() + childPath); } else { path = new Path(dir, childPath); } FileSystem fs = path.getFileSystem(conf); if (fs instanceof DistributedFileSystem) { os = fs.create(path, false); } else { File f = new File(path.toUri().getPath()); if (!f.exists()) { f.getParentFile().mkdirs(); f.createNewFile(); } os = new FileOutputStream(f, false); } ContentType type = content.getContentType(); if (ContentType.BINARY.equals(type)) { if (content.isStreamable()) { InputStream is = null; try { is = content.getContentAsByteStream(); long size = content.getContentSize(); long bufSize = Math.min(size, 512 << 10); byte[] buf = new byte[(int) bufSize]; for (long toRead = size, read = 0; toRead > 0; toRead -= read) { read = is.read(buf, 0, (int) bufSize); if (read > 0) { os.write(buf, 0, (int) read); } else { LOG.error("Premature EOF: uri=" + uri + ",toRead=" + toRead); break; } } } finally { if (is != null) { is.close(); } } } else { os.write(content.getContentAsByteArray()); } } else if (ContentType.TEXT.equals(type) || ContentType.XML.equals(type) || ContentType.JSON.equals(type)) { if (encoding.equals("UTF-8")) { Text t = content.getContentAsText(); os.write(t.getBytes(), 0, t.getLength()); } else { String t = content.getContentAsString(); os.write(t.getBytes(encoding)); } if (LOG.isTraceEnabled()) { Text t = content.getContentAsText(); LOG.trace(t); byte[] bytes = content.getContentAsByteArray(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < bytes.length; i++) { sb.append(Byte.toString(bytes[i])); sb.append(" "); } LOG.trace(sb); } } else { LOG.error("Skipping " + uri + ". Unsupported content type: " + type.name()); } } catch (Exception e) { LOG.error("Error saving: " + uri, e); } finally { if (os != null) { os.close(); } } }
From source file:com.marklogic.mapreduce.examples.BinaryReader.java
License:Apache License
@Override public void write(DocumentURI uri, BytesWritable content) throws IOException, InterruptedException { String pathStr = dir.getName() + uri.getUri(); Path path = new Path(pathStr); FileSystem fs = path.getFileSystem(conf); FSDataOutputStream out = fs.create(path, false); System.out.println("writing to: " + path); out.write(content.getBytes(), 0, content.getLength()); out.flush();//w w w . j ava2 s . co m out.close(); }
From source file:com.metamx.druid.indexer.Utils.java
License:Open Source License
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal;//from ww w . j a v a 2s . c o m FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (!FileOutputFormat.getCompressOutput(job)) { retVal = fs.create(outputPath, false); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath.toString() + codec.getDefaultExtension()); retVal = codec.createOutputStream(fs.create(outputPath, false)); } return retVal; }
From source file:com.ml.ira.algos.LogisticModelParameters.java
License:Apache License
/** * Saves a model to an output stream.//from ww w. j a v a2 s .c om */ public void saveTo(Path path) throws IOException { Closeables.close(lr, false); targetCategories = getCsvRecordFactory().getTargetCategories(); FileSystem ofs = path.getFileSystem(new Configuration()); FSDataOutputStream out = ofs.create(path, true); write(out); out.flush(); ofs.close(); }
From source file:com.ML_Hadoop.K_meansClustering.K_meansClusteringReduce.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/k_mean.txt"; Path path = new Path(uri); // Write the latest values of cetroids' of clusters in 'k_mean.txt' file try {//from ww w .j ava 2s . c o m FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); for (int i = 0; i < number_of_clusters; i++) { for (int j = 0; j < feature_size; j++) br.write(((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get()).toString() + ","); br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File k_mean.txt not found"); } // Write the values of cetroids' of clusters for current iteration in directory '/user/hduser/K-means/...' uri = "/user/hduser/K-means/means-" + current_iteration_num + ".txt"; path = new Path(uri); try { FileSystem fs = FileSystem.get(context.getConfiguration()); if (current_iteration_num == 0) fs.delete(new Path("/user/hduser/K-means"), true); OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true)); BufferedWriter br = new BufferedWriter(osw); for (int i = 0; i < number_of_clusters; i++) { for (int j = 0; j < feature_size; j++) br.write((Float) ((FloatWritable) cetroids_of_all_clusters.get(i).get()[j]).get() + ","); br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File not found"); } }
From source file:com.ML_Hadoop.MultipleLinearRegression.MultipleLinearRegressionReduce.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/theta.txt"; Path path = new Path(uri); try {/* ww w . jav a 2 s.c o m*/ FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); for (int i = 0; i < theta.length; i++) br.write(theta[i].toString() + ","); br.write("\n"); br.close(); } catch (Exception e) { System.out.println("File not found"); } uri = "/user/hduser/LinearReg/theta-" + iteration + ".txt"; path = new Path(uri); try { FileSystem fs = FileSystem.get(context.getConfiguration()); if (iteration == 0) fs.delete(new Path("/user/hduser/LinearReg"), true); OutputStreamWriter osw = new OutputStreamWriter(fs.create(path, true)); BufferedWriter br = new BufferedWriter(osw); br.write(prediction_error + ", "); for (int i = 0; i < theta.length; i++) br.write(theta[i].toString() + ", "); br.write("\n"); br.close(); } catch (Exception e) { System.out.println("File not found"); } }
From source file:com.ML_Hadoop.NaiveBayesClassifier_Continuous_Features.NaiveBayesClassifierReduce_Continuous_Features.java
@Override protected void cleanup(Context context) throws IOException { String uri = "/user/hduser/naive_bayes_continuous.txt"; Path path = new Path(uri); try {/* w ww.ja v a2 s . c om*/ FileSystem fs = FileSystem.get(URI.create(uri), context.getConfiguration()); if (fs.exists(path)) fs.delete(path, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(path, true))); br.write("class_id, mu(mean), std"); br.write("-------------------------------\n"); for (int i = 0; i < number_of_classes; i++) { br.write("-------- Class-" + i + "-------\n"); for (int j = 0; j < number_of_features; j++) { br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_mu"))) + ", "); br.write(((FloatWritable) probablity_info_output.get(i)[j].get(new Text("class_id_std"))) + "\n"); } br.write("\n"); } br.close(); } catch (Exception e) { System.out.println("File /user/hduser/naive_bayes_continuous.txt cannot be found"); } }
From source file:com.mongodb.hadoop.splitter.BSONSplitter.java
License:Apache License
public void writeSplits() throws IOException { if (!getConf().getBoolean("bson.split.write_splits", true)) { LOG.info("bson.split.write_splits is set to false - skipping writing splits to disk."); return;//from w w w . j a v a2 s .c om } else { LOG.info("Writing splits to disk."); } if (splitsList == null) { LOG.info("No splits found, skipping write of splits file."); } Path outputPath = new Path(inputPath.getParent(), "." + inputPath.getName() + ".splits"); FileSystem pathFileSystem = outputPath.getFileSystem(getConf()); FSDataOutputStream fsDataOut = null; try { fsDataOut = pathFileSystem.create(outputPath, false); for (FileSplit inputSplit : splitsList) { BSONObject splitObj = BasicDBObjectBuilder.start().add("s", inputSplit.getStart()) .add("l", inputSplit.getLength()).get(); byte[] encodedObj = bsonEnc.encode(splitObj); fsDataOut.write(encodedObj, 0, encodedObj.length); } } catch (IOException e) { LOG.error("Could not create splits file: " + e.getMessage()); throw e; } finally { if (fsDataOut != null) { fsDataOut.close(); } } }
From source file:com.moz.fiji.mapreduce.util.SerializeLoggerAspect.java
License:Apache License
/** * Logic to serialize collected profiling content to a file on HDFS. The files are stored * in the current working directory for this context, in a folder specified by STATS_DIR. The per * task file is named by the task attempt id. * We obtain the profiling stats collected by the LogTimerAspect in FijiSchema. The format of the * file is as follows: Job Name, Job ID, Task Attempt, Function Signature, * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n' * * @param context The {@link TaskInputOutputContext} for this job. * @throws IOException If the writes to HDFS fail. *///from w ww. j ava 2s .c o m private void serializeToFile(TaskInputOutputContext context) throws IOException { Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR); FileSystem fs = parentPath.getFileSystem(context.getConfiguration()); fs.mkdirs(parentPath); Path path = new Path(parentPath, context.getTaskAttemptID().toString()); OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8"); try { out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), " + "Number of Invocations, Time per call (nanoseconds)\n"); ConcurrentHashMap<String, LoggingInfo> signatureTimeMap = mLogTimerAspect.getSignatureTimeMap(); for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) { writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue()); } signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap(); for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) { writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue()); } } finally { out.close(); } }