Example usage for org.apache.hadoop.mapreduce RecordWriter RecordWriter

List of usage examples for org.apache.hadoop.mapreduce RecordWriter RecordWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter RecordWriter.

Prototype

RecordWriter

Source Link

Usage

From source file:cn.edu.hfut.dmic.webcollector.fetcher.FetcherOutputFormat.java

@Override
public RecordWriter<Text, Writable> getRecordWriter(TaskAttemptContext tac)
        throws IOException, InterruptedException {
    Configuration conf = tac.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    String outputPath = conf.get("mapred.output.dir");

    Path fetchPath = new Path(outputPath, "fetch/info");
    Path contentPath = new Path(outputPath, "content/info");
    Path parseDataPath = new Path(outputPath, "parse/info");
    Path redirectPath = new Path(outputPath, "redirect/info");
    final SequenceFile.Writer fetchOut = new SequenceFile.Writer(fs, conf, fetchPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer contentOut = new SequenceFile.Writer(fs, conf, contentPath, Text.class,
            Content.class);
    final SequenceFile.Writer parseDataOut = new SequenceFile.Writer(fs, conf, parseDataPath, Text.class,
            CrawlDatum.class);
    final SequenceFile.Writer redirectOut = new SequenceFile.Writer(fs, conf, redirectPath, CrawlDatum.class,
            Text.class);

    return new RecordWriter<Text, Writable>() {

        @Override/*from w  w  w. j  a  va  2  s .  c o m*/
        public void write(Text k, Writable v) throws IOException, InterruptedException {
            if (v instanceof CrawlDatum) {
                fetchOut.append(k, v);
            } else if (v instanceof Content) {
                contentOut.append(k, v);
            } else if (v instanceof ParseData) {

                ParseData parseData = (ParseData) v;
                CrawlDatums next = parseData.next;
                for (CrawlDatum datum : next) {
                    parseDataOut.append(new Text(datum.getKey()), datum);
                }

            } else if (v instanceof Redirect) {
                Redirect redirect = (Redirect) v;
                redirectOut.append(redirect.datum, new Text(redirect.realUrl));
            }
        }

        @Override
        public void close(TaskAttemptContext tac) throws IOException, InterruptedException {
            fetchOut.close();
            contentOut.close();
            parseDataOut.close();
            redirectOut.close();
        }
    };

}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to different output files.
 *
 * @return a composite record writer/*from www . j  av a 2  s  . c om*/
 * @throws IOException
 */
@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException {
    final String outputName = FileOutputFormat.getOutputName(job);

    Configuration configuration = job.getConfiguration();
    Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass(
            PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class);

    @SuppressWarnings("unchecked")
    final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false)
            .get(TypeToken.of(partitionerClass)).create();

    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    final Partitioning partitioning = outputDataset.getPartitioning();

    dynamicPartitioner.initialize(taskContext);

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>();

        public void write(K key, V value) throws IOException, InterruptedException {
            PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
            RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
            if (rw == null) {
                String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning);
                String finalPath = relativePath + "/" + outputName;

                // if we don't have the record writer yet for the final path, create one and add it to the cache
                rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath));
                this.recordWriters.put(partitionKey, rw);
            }
            rw.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            try {
                List<RecordWriter<?, ?>> recordWriters = new ArrayList<>();
                recordWriters.addAll(this.recordWriters.values());
                MultipleOutputs.closeRecordWriters(recordWriters, context);

                taskContext.flushOperations();
            } catch (Exception e) {
                throw new IOException(e);
            } finally {
                dynamicPartitioner.destroy();
            }
        }
    };
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.UnsupportedOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) {
    return new RecordWriter<K, V>() {
        public void write(K key, V value) {
            throw new UnsupportedOperationException("Writing to output is not supported.");
        }/*  ww w. j  a v  a 2 s .co  m*/

        public void close(TaskAttemptContext context) {
        }
    };
}

From source file:com.asakusafw.runtime.stage.output.EmptyFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<Object, Object> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    return new RecordWriter<Object, Object>() {
        @Override// ww  w  .  j  av a2  s.c o  m
        public void write(Object key, Object value) throws IOException, InterruptedException {
            return;
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            return;
        }
    };
}

From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java

License:Apache License

/**
 * Creates a new {@link RecordWriter} to output temporary data.
 * @param <V> value type//from w  ww .  ja  v a2  s.  co  m
 * @param context current context
 * @param name output name
 * @param dataType value type
 * @return the created writer
 * @throws IOException if failed to create a new {@link RecordWriter}
 * @throws InterruptedException if interrupted
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> RecordWriter<NullWritable, V> createRecordWriter(TaskAttemptContext context, String name,
        Class<V> dataType) throws IOException, InterruptedException {
    if (context == null) {
        throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
    }
    if (name == null) {
        throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$
    }
    if (dataType == null) {
        throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$
    }
    CompressionCodec codec = null;
    Configuration conf = context.getConfiguration();
    if (FileOutputFormat.getCompressOutput(context)) {
        Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    FileOutputCommitter committer = getOutputCommitter(context);
    final Path file = new Path(committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$
    final ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec);
    return new RecordWriter<NullWritable, V>() {

        @Override
        public void write(NullWritable key, V value) throws IOException {
            out.write(value);
        }

        @Override
        public void close(TaskAttemptContext ignored) throws IOException {
            out.close();
        }

        @Override
        public String toString() {
            return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$
        }
    };
}

From source file:com.ci.backports.avro.mapreduce.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());
    if (schema == null) {
        throw new RuntimeException("AvroOutputFormat requires an output schema.");
    }/*w  w w .  j  a v a  2s.c om*/

    final DataFileWriter<T> writer = new DataFileWriter<T>(new SpecificDatumWriter<T>());

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        public void write(AvroWrapper<T> record, NullWritable ignore) throws IOException {
            writer.append(record.datum());
        }

        public void close(TaskAttemptContext context) throws IOException {
            writer.close();
        }
    };
}

From source file:com.ci.backports.hadoop.hbase.ZHFileOutputFormat.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE);
    final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE);
    // Invented config.  Add to hbase-*.xml if other than default compression.
    final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();/*  w  w  w  .  ja v a2  s.  c  o  m*/
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory exists
            if (wl == null) {
                fs.mkdirs(new Path(outputdir, Bytes.toString(family)));
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /* Create a new HFile.Writer.
         * @param family
         * @return A WriterLength, containing a new HFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            wl.writer = new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression,
                    KeyValue.KEY_COMPARATOR);
            this.writers.put(family, wl);
            return wl;
        }

        private void close(final HFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.citic.zxyjs.zwlscx.mapreduce.lib.input.HFileOutputFormatBase.java

License:Apache License

public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context)
        throws IOException, InterruptedException {
    // Get the path of the temporary output file
    final Path outputPath = FileOutputFormat.getOutputPath(context);
    final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath();
    final Path ignoreOutputPath = new Path(outputPath + "_ignore");

    final Configuration conf = context.getConfiguration();
    final FileSystem fs = outputdir.getFileSystem(conf);
    // These configs. are from hbase-*.xml
    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE);
    // Invented config. Add to hbase-*.xml if other than default
    // compression.
    final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName());
    final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
            false);/*from   w w w . j av  a2  s  .co  m*/

    if (fs.exists(ignoreOutputPath)) {
        LOG.info("Deleted " + ignoreOutputPath.toString() + " success.");
        fs.delete(ignoreOutputPath, true);
    }

    // create a map from column family to the compression algorithm
    final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf);
    final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
    final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);

    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
    final HFileDataBlockEncoder encoder;
    if (dataBlockEncodingStr == null) {
        encoder = NoOpDataBlockEncoder.INSTANCE;
    } else {
        try {
            encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr));
        } catch (IllegalArgumentException ex) {
            throw new RuntimeException("Invalid data block encoding type configured for the param "
                    + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
        }
    }

    return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
        // Map of families to writers and how much has been output on the
        // writer.
        private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>(
                Bytes.BYTES_COMPARATOR);
        private final FSDataOutputStream dos = fs.create(ignoreOutputPath);
        private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY;
        private final byte[] now = Bytes.toBytes(System.currentTimeMillis());
        private boolean rollRequested = false;

        public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException {
            // null input == user explicitly wants to flush
            if (row == null && kv == null) {
                rollWriters();
                return;
            }

            byte[] rowKey = kv.getRow();
            long length = kv.getLength();
            byte[] family = kv.getFamily();

            if (ignore(kv)) {
                byte[] readBuf = rowKey;
                dos.write(readBuf, 0, readBuf.length);
                dos.write(Bytes.toBytes("\n"));
                return;
            }
            WriterLength wl = this.writers.get(family);

            // If this is a new column family, verify that the directory
            // exists
            if (wl == null) {
                Path path = null;
                path = new Path(outputdir, Bytes.toString(family));
                fs.mkdirs(path);
            }

            // If any of the HFiles for the column families has reached
            // maxsize, we need to roll all the writers
            if (wl != null && wl.written + length >= maxsize) {
                this.rollRequested = true;
            }

            // This can only happen once a row is finished though
            if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
                rollWriters();
            }

            // create a new HLog writer, if necessary
            if (wl == null || wl.writer == null) {
                wl = getNewWriter(family, conf);
            }

            // we now have the proper HLog writer. full steam ahead
            kv.updateLatestStamp(this.now);
            wl.writer.append(kv);
            wl.written += length;

            // Copy the row so we know when a row transition.
            this.previousRow = rowKey;
        }

        private void rollWriters() throws IOException {
            for (WriterLength wl : this.writers.values()) {
                if (wl.writer != null) {
                    LOG.info("Writer=" + wl.writer.getPath()
                            + ((wl.written == 0) ? "" : ", wrote=" + wl.written));
                    close(wl.writer);
                }
                wl.writer = null;
                wl.written = 0;
            }
            this.rollRequested = false;
        }

        /*
         * Create a new StoreFile.Writer.
         * @param family
         * @return A WriterLength, containing a new StoreFile.Writer.
         * @throws IOException
         */
        private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException {
            WriterLength wl = new WriterLength();
            Path familydir = new Path(outputdir, Bytes.toString(family));
            String compression = compressionMap.get(family);
            compression = compression == null ? defaultCompression : compression;
            String bloomTypeStr = bloomTypeMap.get(family);
            BloomType bloomType = BloomType.NONE;
            if (bloomTypeStr != null) {
                bloomType = BloomType.valueOf(bloomTypeStr);
            }
            String blockSizeString = blockSizeMap.get(family);
            int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE
                    : Integer.parseInt(blockSizeString);
            Configuration tempConf = new Configuration(conf);
            tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
            wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
                    .withOutputDir(familydir)
                    .withCompression(AbstractHFileWriter.compressionByName(compression))
                    .withBloomType(bloomType).withComparator(KeyValue.COMPARATOR).withDataBlockEncoder(encoder)
                    .withChecksumType(HStore.getChecksumType(conf))
                    .withBytesPerChecksum(HStore.getBytesPerChecksum(conf)).build();

            this.writers.put(family, wl);
            return wl;
        }

        private void close(final StoreFile.Writer w) throws IOException {
            if (w != null) {
                w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
                w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
                        Bytes.toBytes(context.getTaskAttemptID().toString()));
                w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
                w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
                w.appendTrackedTimestampsToMetadata();
                w.close();
            }
        }

        public void close(TaskAttemptContext c) throws IOException, InterruptedException {
            dos.flush();
            dos.close();
            for (WriterLength wl : this.writers.values()) {
                close(wl.writer);
            }
        }
    };
}

From source file:com.cloudera.crunch.type.avro.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());

    final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>());

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override//w  w w. j a v  a 2 s.c  o  m
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}

From source file:com.cloudera.sqoop.mapreduce.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    Schema schema = AvroJob.getMapOutputSchema(context.getConfiguration());

    final DataFileWriter<T> WRITER = new DataFileWriter<T>(new GenericDatumWriter<T>());

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        @Override/* w ww .  j  av  a  2s . c o m*/
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            WRITER.append(wrapper.datum());
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            WRITER.close();
        }
    };
}