List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputCommitter FileOutputCommitter
@Private public FileOutputCommitter(Path outputPath, JobContext context) throws IOException
From source file:com.netflix.bdp.s3.S3MultipartOutputCommitter.java
License:Apache License
public S3MultipartOutputCommitter(Path outputPath, JobContext context) throws IOException { super(outputPath, (TaskAttemptContext) context); this.constructorOutputPath = outputPath; Configuration conf = context.getConfiguration(); this.uploadPartSize = conf.getLong(S3Committer.UPLOAD_SIZE, S3Committer.DEFAULT_UPLOAD_SIZE); // Spark will use a fake app id based on the current minute and job id 0. // To avoid collisions, use the YARN application ID for Spark. this.uuid = conf.get(S3Committer.UPLOAD_UUID, conf.get(S3Committer.SPARK_WRITE_UUID, conf.get(S3Committer.SPARK_APP_ID, context.getJobID().toString()))); if (context instanceof TaskAttemptContext) { this.workPath = taskAttemptPath((TaskAttemptContext) context, uuid); } else {// w w w . ja v a 2 s . c o m this.workPath = null; } this.wrappedCommitter = new FileOutputCommitter(Paths.getMultipartUploadCommitsDirectory(conf, uuid), context); }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormat2.java
License:Apache License
static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/*from w w w. j ava 2 s. c om*/ // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, V>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, V cell) throws IOException { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* Create a new StoreFile.Writer. * @param family * @return A WriterLength, containing a new StoreFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter() //getRecordWriter() final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/*from ww w. j a v a 2 s.com*/ // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv throws IOException { // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);// // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) {// if byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * * @param family * * @return A WriterLength, containing a new StoreFile.Writer. * * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush();// dos.close();// for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.phantom.hadoop.examples.terasort.TeraOutputFormat.java
License:Apache License
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { if (committer == null) { Path output = getOutputPath(context); committer = new FileOutputCommitter(output, context); }/*from www. j av a2 s . c om*/ return committer; }
From source file:com.pinterest.terrapin.hadoop.HFileOutputFormat.java
License:Apache License
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm(conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm).build(); return new HFileRecordWriter(writer); }
From source file:eu.stratosphere.hadoopcompatibility.mapreduce.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException//from w ww.j a v a2 s . c o m */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } // for hadoop 2.2 this.configuration.set("mapreduce.output.basename", "tmp"); TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.configuration, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.configuration.set("mapred.task.id", taskAttemptID.toString()); // for hadoop 2.2 this.configuration.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.fileOutputCommitter = new FileOutputCommitter(new Path(this.configuration.get("mapred.output.dir")), context); try { this.fileOutputCommitter.setupJob(HadoopUtils.instantiateJobContext(this.configuration, new JobID())); } catch (Exception e) { throw new RuntimeException(e); } // compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1 this.configuration.set("mapreduce.task.output.dir", this.fileOutputCommitter.getWorkPath().toString()); try { this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context); } catch (InterruptedException e) { throw new IOException("Could not create RecordWriter.", e); } }
From source file:gr.ntua.h2rdf.inputFormat.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override/*from w ww.j a va 2 s . co m*/ public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactoryNoCache(conf).create(); //return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:input_format.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override/* w w w. j ava 2 s .c o m*/ public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:kafka.bridge.hadoop.KafkaOutputFormat.java
License:Apache License
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { // Is there a programmatic way to get the temp dir? I see it hardcoded everywhere in Hadoop, Hive, and Pig. return new FileOutputCommitter( new Path("/tmp/" + taskAttemptContext.getTaskAttemptID().getJobID().toString()), taskAttemptContext);// www . j a v a 2 s . co m }
From source file:mlbench.bayes.test.BayesTest.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);/*from w w w . j a va2 s . c o m*/ HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); NaiveBayesModel model = NaiveBayesModel.materialize(modelPath, config); classifier = new StandardNaiveBayesClassifier(model); MPI_D.COMM_BIPARTITE_O.Barrier(); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<Text, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); Text key = kvrr.createKey(); VectorWritable value = kvrr.createValue(); while (kvrr.next(key, value)) { Vector result = classifier.classifyFull(value.get()); MPI_D.Send(new Text(SLASH.split(key.toString())[1]), new VectorWritable(result)); } } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDir); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(Text.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<Text, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } Text key = null; VectorWritable point = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { key = (Text) vals[0]; point = (VectorWritable) vals[1]; if (key != null && point != null) { vector = point.get(); outrw.write(key, new VectorWritable(vector)); } vals = MPI_D.Recv(); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } MPI_D.COMM_BIPARTITE_A.Barrier(); if (rank == 0) { // load the labels Map<Integer, String> labelMap = BayesUtils.readLabelIndex(config, labPath); // loop over the results and create the confusion matrix SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>( output, PathType.LIST, PathFilters.partFilter(), config); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer); } } MPI_D.Finalize(); }