List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter getWorkPath
public Path getWorkPath() throws IOException
From source file:co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat.java
License:Apache License
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { Path p1;/* w w w .j ava 2 s. c o m*/ isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND, "false"); if (isAppend.equalsIgnoreCase("false")) { p1 = super.getDefaultWorkFile(context, extension); } else { FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); Path p = committer.getWorkPath(); fileCount = p.getFileSystem(context.getConfiguration()).getContentSummary(getOutputPath(context)) .getFileCount(); if (fileCount > 1) fileCount = fileCount - 1; p1 = new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension)); } return p1; }
From source file:co.nubetech.hiho.mapreduce.lib.output.AppendTextOutputFormat.java
License:Apache License
@Override public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { Path p1;// w w w . ja v a2s . c om isAppend = context.getConfiguration().get(HIHOConf.IS_APPEND); if (isAppend.equalsIgnoreCase("false")) { p1 = super.getDefaultWorkFile(context, extension); } else { FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); Path p = committer.getWorkPath(); fileCount = p.getFileSystem(context.getConfiguration()).getContentSummary(getOutputPath(context)) .getFileCount(); if (fileCount > 1) { fileCount = fileCount - 1; } p1 = new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension)); } return p1; }
From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java
License:Apache License
/** * Creates a new {@link RecordWriter} to output temporary data. * @param <V> value type/*from w ww . j a v a 2s .c o m*/ * @param context current context * @param name output name * @param dataType value type * @return the created writer * @throws IOException if failed to create a new {@link RecordWriter} * @throws InterruptedException if interrupted * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> RecordWriter<NullWritable, V> createRecordWriter(TaskAttemptContext context, String name, Class<V> dataType) throws IOException, InterruptedException { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } if (name == null) { throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$ } if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } CompressionCodec codec = null; Configuration conf = context.getConfiguration(); if (FileOutputFormat.getCompressOutput(context)) { Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } FileOutputCommitter committer = getOutputCommitter(context); final Path file = new Path(committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$ final ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec); return new RecordWriter<NullWritable, V>() { @Override public void write(NullWritable key, V value) throws IOException { out.write(value); } @Override public void close(TaskAttemptContext ignored) throws IOException { out.close(); } @Override public String toString() { return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$ } }; }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the {@link Path} to the task's temporary output directory for the * map-reduce job// w ww . j a va2s . co m * * <h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4> * * <p> * Some applications need to create/write-to side-files, which differ from * the actual job-outputs. * * <p> * In such cases there could be issues with 2 instances of the same TIP * (running simultaneously e.g. speculative tasks) trying to open/write-to * the same file (path) on HDFS. Hence the application-writer will have to * pick unique names per task-attempt (e.g. using the attemptid, say * <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP. * </p> * * <p> * To get around this the Map-Reduce framework helps the application-writer * out by maintaining a special * <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> * sub-directory for each task-attempt on HDFS where the output of the * task-attempt goes. On successful completion of the task-attempt the files * in the * <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> * (only) are <i>promoted</i> to * <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the * framework discards the sub-directory of unsuccessful task-attempts. This * is completely transparent to the application. * </p> * * <p> * The application-writer can take advantage of this by creating any * side-files required in a work directory during execution of his task i.e. * via {@link #getWorkOutputPath(TaskInputOutputContext)}, and the framework * will move them out similarly - thus she doesn't have to pick unique paths * per task-attempt. * </p> * * <p> * The entire discussion holds true for maps of jobs with reducer=NONE (i.e. * 0 reduces) since output of the map, in that case, goes directly to HDFS. * </p> * * @return the {@link Path} to the task's temporary output directory for the * map-reduce job. */ public static Path getWorkOutputPath(TaskInputOutputContext<?, ?, ?, ?> context) throws IOException, InterruptedException { FileOutputCommitter committer = (FileOutputCommitter) context.getOutputCommitter(); return committer.getWorkPath(); }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Get the default path and filename for the output format. * /*from w w w . j a v a2 s.c om*/ * @param context * the task context * @param extension * an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException { FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, getOutputName(context), extension)); }
From source file:com.cloudera.dataflow.spark.ShardNameTemplateHelper.java
License:Open Source License
public static <K, V> Path getDefaultWorkFile(FileOutputFormat<K, V> format, TaskAttemptContext context) throws IOException { FileOutputCommitter committer = (FileOutputCommitter) format.getOutputCommitter(context); return new Path(committer.getWorkPath(), getOutputFile(context)); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.ProxyOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { createOutputFormatIfNeeded(context); String outDir = context.getConfiguration().get("mapred.output.dir"); originalDir = outDir;//w w w . j av a2 s .c om FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context); baseDir = committer.getWorkPath() + ""; Configuration conf = new Configuration(context.getConfiguration()); TaskAttemptContext reContext; try { reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID()); } catch (Exception e) { throw new IOException(e); } reContext.getConfiguration().set("mapred.output.dir", baseDir); // This is for Hadoop 2.0 : reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir); try { return new ProxyOutputCommitter(new Path(originalDir), context, outputFormat.getOutputCommitter(reContext)); } catch (InterruptedException e) { throw new RuntimeException(e); } }
From source file:com.facebook.hiveio.common.HadoopUtils.java
License:Apache License
/** * Set worker output directory// w w w . j a v a 2 s . com * @param context Task context * @throws IOException I/O errors */ public static void setWorkOutputDir(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String outputPath = getOutputDir(conf); // we need to do this to get the task path and set it for mapred // implementation since it can't be done automatically because of // mapreduce->mapred abstraction if (outputPath != null) { FileOutputCommitter foc = new FileOutputCommitter(getOutputPath(conf), context); Path path = foc.getWorkPath(); FileSystem fs = path.getFileSystem(conf); fs.mkdirs(path); conf.set("mapred.work.output.dir", path.toString()); LOG.info("Setting mapred.work.output.dir to {}", path.toString()); } }
From source file:com.linkedin.camus.etl.kafka.common.StringKafkaRecordWriterProvider.java
@Override public RecordWriter<IEtlKey, CamusWrapper> getDataRecordWriter(final TaskAttemptContext context, final String fileName, CamusWrapper data, FileOutputCommitter committer) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path file = committer.getWorkPath(); file = new Path(file, EtlMultiOutputFormat.getUniqueFile(context, fileName, getFilenameExtension())); CompressionCodec codec = null;// ww w . j a v a2 s . co m SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.NONE; final SequenceFile.Writer out = SequenceFile.createWriter(conf, SequenceFile.Writer.file(file), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class), SequenceFile.Writer.compression(compressionType, codec), SequenceFile.Writer.progressable(context)); return new RecordWriter<IEtlKey, CamusWrapper>() { @Override public void write(IEtlKey iEtlKey, CamusWrapper camusWrapper) throws IOException { String record = (String) camusWrapper.getRecord() + recordDelimiter; out.append(new Text(String.valueOf(iEtlKey.getOffset())), new Text(record.getBytes())); } @Override public void close(TaskAttemptContext taskAttemptContext) throws IOException { out.close(); } }; }
From source file:com.splout.db.hadoop.engine.SploutSQLProxyOutputFormat.java
License:Apache License
@Override public RecordWriter<ITuple, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { long waitTimeHeartBeater = context.getConfiguration().getLong(HeartBeater.WAIT_TIME_CONF, 5000); heartBeater = new HeartBeater(context, waitTimeHeartBeater); heartBeater.needHeartBeat();//from w w w .j ava2 s. co m conf = context.getConfiguration(); this.context = context; outputFormat.setConf(context.getConfiguration()); return new RecordWriter<ITuple, NullWritable>() { // Temporary and permanent Paths for properly writing Hadoop output files private Map<Integer, Path> permPool = new HashMap<Integer, Path>(); private Map<Integer, Path> tempPool = new HashMap<Integer, Path>(); private void initSql(int partition) throws IOException, InterruptedException { // HDFS final location of the generated partition file. It will be // loaded to the temporary folder in the HDFS than finally will be // committed by the OutputCommitter to the proper location. FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter( SploutSQLProxyOutputFormat.this.context); Path perm = new Path(committer.getWorkPath(), partition + ".db"); FileSystem fs = perm.getFileSystem(conf); // Make a task unique name that contains the actual index output name to // make debugging simpler // Note: if using JVM reuse, the sequence number will not be reset for a // new task using the jvm Path temp = conf.getLocalPath("mapred.local.dir", "splout_task_" + SploutSQLProxyOutputFormat.this.context.getTaskAttemptID() + '.' + FILE_SEQUENCE.incrementAndGet()); FileSystem localFileSystem = FileSystem.getLocal(conf); if (localFileSystem.exists(temp)) { localFileSystem.delete(temp, true); } localFileSystem.mkdirs(temp); Path local = fs.startLocalOutput(perm, new Path(temp, partition + ".db")); // permPool.put(partition, perm); tempPool.put(partition, new Path(temp, partition + ".db")); outputFormat.initPartition(partition, local); } @Override public void close(TaskAttemptContext ctx) throws IOException, InterruptedException { FileSystem fs = FileSystem.get(ctx.getConfiguration()); try { if (ctx != null) { heartBeater.setProgress(ctx); } outputFormat.close(); for (Map.Entry<Integer, Path> entry : permPool.entrySet()) { // Hadoop - completeLocalOutput() fs.completeLocalOutput(entry.getValue(), tempPool.get(entry.getKey())); } } finally { // in any case, destroy the HeartBeater heartBeater.cancelHeartBeat(); } } @Override public void write(ITuple tuple, NullWritable ignore) throws IOException, InterruptedException { int partition = (Integer) tuple.get(SploutSQLOutputFormat.PARTITION_TUPLE_FIELD); if (tempPool.get(partition) == null) { initSql(partition); } outputFormat.write(tuple); } }; }