List of usage examples for org.apache.hadoop.mapreduce RecordWriter RecordWriter
RecordWriter
From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java
@Override public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac) throws IOException, InterruptedException { Configuration conf = tac.getConfiguration(); FileSystem fs = FileSystem.get(conf); String outputPath = conf.get("mapred.output.dir"); Path fetchPath = new Path(outputPath, "fetch/info"); Path contentPath = new Path(outputPath, "content/info"); Path parseDataPath = new Path(outputPath, "parse/info"); Path redirectPath = new Path(outputPath, "redirect/info"); final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class, CrawlDatum.class); final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class, Content.class); final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class, CrawlDatum.class); final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class, Text.class); return new RecordWriter<Text, Writable>() { @Override/*from w w w. j a va 2 s . c o m*/ public void write(Text k, Writable v) throws IOException, InterruptedException { if (v instanceof CrawlDatum) { fetchOut.append(k, v); } else if (v instanceof Content) { contentOut.append(k, v); } else if (v instanceof ParseData) { ParseData parseData = (ParseData) v; CrawlDatums next = parseData.next; for (CrawlDatum datum : next) { parseDataOut.append(new Text(datum.getKey()), datum); } } else if (v instanceof Redirect) { Redirect redirect = (Redirect) v; redirectOut.append(redirect.datum, new Text(redirect.realUrl)); } } @Override public void close(TaskAttemptContext tac) throws IOException, InterruptedException { fetchOut.close(); contentOut.close(); parseDataOut.close(); redirectOut.close(); } }; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
/** * Create a composite record writer that can write key/value data to different output files. * * @return a composite record writer/*from www . j av a 2 s . c om*/ * @throws IOException */ @Override public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException { final String outputName = FileOutputFormat.getOutputName(job); Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass( PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); @SuppressWarnings("unchecked") final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false) .get(TypeToken.of(partitionerClass)).create(); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job); String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName); final Partitioning partitioning = outputDataset.getPartitioning(); dynamicPartitioner.initialize(taskContext); return new RecordWriter<K, V>() { // a cache storing the record writers for different output files. Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>(); public void write(K key, V value) throws IOException, InterruptedException { PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value); RecordWriter<K, V> rw = this.recordWriters.get(partitionKey); if (rw == null) { String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning); String finalPath = relativePath + "/" + outputName; // if we don't have the record writer yet for the final path, create one and add it to the cache rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath)); this.recordWriters.put(partitionKey, rw); } rw.write(key, value); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { try { List<RecordWriter<?, ?>> recordWriters = new ArrayList<>(); recordWriters.addAll(this.recordWriters.values()); MultipleOutputs.closeRecordWriters(recordWriters, context); taskContext.flushOperations(); } catch (Exception e) { throw new IOException(e); } finally { dynamicPartitioner.destroy(); } } }; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.UnsupportedOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) { return new RecordWriter<K, V>() { public void write(K key, V value) { throw new UnsupportedOperationException("Writing to output is not supported."); }/* ww w. j a v a 2 s .co m*/ public void close(TaskAttemptContext context) { } }; }
From source file:com.asakusafw.runtime.stage.output.EmptyFileOutputFormat.java
License:Apache License
@Override public RecordWriter<Object, Object> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { return new RecordWriter<Object, Object>() { @Override// ww w . j av a2 s.c o m public void write(Object key, Object value) throws IOException, InterruptedException { return; } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { return; } }; }
From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java
License:Apache License
/** * Creates a new {@link RecordWriter} to output temporary data. * @param <V> value type//from w ww . ja v a2 s. co m * @param context current context * @param name output name * @param dataType value type * @return the created writer * @throws IOException if failed to create a new {@link RecordWriter} * @throws InterruptedException if interrupted * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> RecordWriter<NullWritable, V> createRecordWriter(TaskAttemptContext context, String name, Class<V> dataType) throws IOException, InterruptedException { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } if (name == null) { throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$ } if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } CompressionCodec codec = null; Configuration conf = context.getConfiguration(); if (FileOutputFormat.getCompressOutput(context)) { Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } FileOutputCommitter committer = getOutputCommitter(context); final Path file = new Path(committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$ final ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec); return new RecordWriter<NullWritable, V>() { @Override public void write(NullWritable key, V value) throws IOException { out.write(value); } @Override public void close(TaskAttemptContext ignored) throws IOException { out.close(); } @Override public String toString() { return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$ } }; }
From source file:com.ci.backports.avro.mapreduce.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { Schema schema = AvroJob.getOutputSchema(context.getConfiguration()); if (schema == null) { throw new RuntimeException("AvroOutputFormat requires an output schema."); }/*w w w . j a v a 2s.c om*/ final DataFileWriter<T> writer = new DataFileWriter<T>(new SpecificDatumWriter<T>()); if (FileOutputFormat.getCompressOutput(context)) { int level = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> record, NullWritable ignore) throws IOException { writer.append(record.datum()); } public void close(TaskAttemptContext context) throws IOException { writer.close(); } }; }
From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); return new RecordWriter<ImmutableBytesWritable, KeyValue>() { // Map of families to writers and how much has been output on the writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters();/* w w w . ja v a2 s. c o m*/ return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* Create a new HFile.Writer. * @param family * @return A WriterLength, containing a new HFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression, KeyValue.KEY_COMPARATOR); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = new Path(outputPath + "_ignore"); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/*from w w w . j av a2 s .co m*/ if (fs.exists(ignoreOutputPath)) { LOG.info("Deleted " + ignoreOutputPath.toString() + " success."); fs.delete(ignoreOutputPath, true); } // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() { // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) { byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { Path path = null; path = new Path(outputdir, Bytes.toString(family)); fs.mkdirs(path); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * @param family * @return A WriterLength, containing a new StoreFile.Writer. * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize) .withOutputDir(familydir) .withCompression(AbstractHFileWriter.compressionByName(compression)) .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder) .withChecksumType(HStore.getChecksumType(conf)) .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush(); dos.close(); for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Schema schema = AvroJob.getOutputSchema(context.getConfiguration()); final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>()); Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override//w w w. j a v a 2 s.c o m public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { WRITER.append(wrapper.datum()); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { WRITER.close(); } }; }
From source file:com.cloudera.sqoop.mapreduce.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Schema schema = AvroJob.getMapOutputSchema(context.getConfiguration()); final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>()); Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { @Override/* w ww . j av a 2s . c o m*/ public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { WRITER.append(wrapper.datum()); } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { WRITER.close(); } }; }