Example usage for org.apache.hadoop.mapreduce RecordWriter write

List of usage examples for org.apache.hadoop.mapreduce RecordWriter write

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordWriter write.

Prototype

public abstract void write(K key, V value) throws IOException, InterruptedException;

Source Link

Document

Writes a key/value pair.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to different output files.
 *
 * @return a composite record writer// w  w  w. ja  va2s  .co m
 * @throws IOException
 */
@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException {
    final String outputName = FileOutputFormat.getOutputName(job);

    Configuration configuration = job.getConfiguration();
    Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass(
            PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class);

    @SuppressWarnings("unchecked")
    final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false)
            .get(TypeToken.of(partitionerClass)).create();

    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    final Partitioning partitioning = outputDataset.getPartitioning();

    dynamicPartitioner.initialize(taskContext);

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>();

        public void write(K key, V value) throws IOException, InterruptedException {
            PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
            RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
            if (rw == null) {
                String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning);
                String finalPath = relativePath + "/" + outputName;

                // if we don't have the record writer yet for the final path, create one and add it to the cache
                rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath));
                this.recordWriters.put(partitionKey, rw);
            }
            rw.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            try {
                List<RecordWriter<?, ?>> recordWriters = new ArrayList<>();
                recordWriters.addAll(this.recordWriters.values());
                MultipleOutputs.closeRecordWriters(recordWriters, context);

                taskContext.flushOperations();
            } catch (Exception e) {
                throw new IOException(e);
            } finally {
                dynamicPartitioner.destroy();
            }
        }
    };
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.MultiWriter.java

License:Apache License

public void write(K key, V value) throws IOException, InterruptedException {
    PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
    RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
    if (rw == null) {
        // if we don't have the record writer yet for the final path, create one and add it to the cache
        TaskAttemptContext taskAttemptContext = getKeySpecificContext(partitionKey);
        rw = getBaseRecordWriter(taskAttemptContext);
        this.recordWriters.put(partitionKey, rw);
        this.contexts.put(partitionKey, taskAttemptContext);
    }/*from   ww w .  ja  v  a  2 s. c o m*/
    rw.write(key, value);
}

From source file:com.facebook.hiveio.output.OutputCmd.java

License:Apache License

/**
 * Write output/*from ww w.  j ava2  s  .c  om*/
 *
 * @param context Context
 * @throws Exception
 */
public void write(Context context) throws Exception {
    PerThread threadLocal = context.perThread.get();

    HiveApiOutputCommitter outputCommitter = context.outputFormat.getOutputCommitter(threadLocal.taskContext());

    outputCommitter.setupTask(threadLocal.taskContext());

    RecordWriter<WritableComparable, HiveWritableRecord> recordWriter = context.outputFormat
            .getRecordWriter(threadLocal.taskContext());

    HiveWritableRecord record = HiveRecordFactory.newWritableRecord(context.schema);

    // TODO: allow type promotions: see https://github.com/facebook/hive-io-experimental/issues/15
    record.set(0, 11L);
    record.set(1, 22.22);
    record.set(2, true);
    record.set(3, "foo");
    recordWriter.write(NullWritable.get(), record);

    record.set(0, 33L);
    record.set(1, 44.44);
    record.set(2, false);
    record.set(3, "bar");
    recordWriter.write(NullWritable.get(), record);

    recordWriter.close(threadLocal.taskContext());

    if (outputCommitter.needsTaskCommit(threadLocal.taskContext())) {
        outputCommitter.commitTask(threadLocal.taskContext());
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java

License:Apache License

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath,
        Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        final Progressable progressable) throws IOException {

    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }//from   w  ww .j ava  2 s . com

    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);

    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims()
            .newTaskAttemptContext(job.getConfiguration(), progressable);

    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(
            tac);

    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }

    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = outputdir;
                for (;;) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(outputdir, true);
                fs.createNewFile(outputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("\u0001");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Creates an lzo file with random data.
 * //from   www. j  a  v  a2 s .com
 * @param outputDir Output directory.
 * @param fs File system we're using.
 * @param attemptContext Task attempt context, contains task id etc. 
 * @throws IOException
 * @throws InterruptedException
 */
private byte[] createTestInput(Path outputDir, FileSystem fs, TaskAttemptContext attemptContext,
        int charsToOutput) throws IOException, InterruptedException {

    TextOutputFormat<Text, Text> output = new TextOutputFormat<Text, Text>();
    RecordWriter<Text, Text> rw = null;

    md5.reset();

    try {
        rw = output.getRecordWriter(attemptContext);

        char[] chars = "abcdefghijklmnopqrstuvwxyz\u00E5\u00E4\u00F6".toCharArray();

        Random r = new Random(System.currentTimeMillis());
        Text key = new Text();
        Text value = new Text();
        int charsMax = chars.length - 1;
        for (int i = 0; i < charsToOutput;) {
            i += fillText(chars, r, charsMax, key);
            i += fillText(chars, r, charsMax, value);
            rw.write(key, value);
            md5.update(key.getBytes(), 0, key.getLength());
            // text output format writes tab between the key and value
            md5.update("\t".getBytes("UTF-8"));
            md5.update(value.getBytes(), 0, value.getLength());
        }
    } finally {
        if (rw != null) {
            rw.close(attemptContext);
            OutputCommitter committer = output.getOutputCommitter(attemptContext);
            committer.commitTask(attemptContext);
            committer.cleanupJob(attemptContext);
        }
    }

    byte[] result = md5.digest();
    md5.reset();
    return result;
}

From source file:com.linkedin.pinot.hadoop.io.PinotOutputFormatTest.java

License:Apache License

private Map<Integer, Emp> addTestData() throws IOException, InterruptedException {
    int days = 2000;
    int sal = 20;
    RecordWriter<Object, Emp> writer = outputFormat.getRecordWriter(fakeTaskAttemptContext);
    Map<Integer, Emp> inputMap = new HashMap<>();
    for (int i = 0; i < 10; i++) {
        String name = "name " + i;
        Emp e = new Emp(i, name, days + i, sal + i);
        writer.write(null, e);
        inputMap.put(i, e);//www . j  a  v  a  2s  .c  om
    }
    writer.close(fakeTaskAttemptContext);
    return inputMap;
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileOutputFormatTests.java

License:Apache License

@Test
public void testBuildAndReadProtoFile() throws Exception {
    MilanoProtoFileOutputFormat outputFormat = new MilanoProtoFileOutputFormat();

    MilanoTypeMetadata.TypeMetadata.Builder metadata = MilanoTool
            .with(Testing.TestItem.getDescriptor().getName(), Testing.getDescriptor()).getMetadata()
            .toBuilder();/*from w w w .  j av a 2s  . co m*/

    metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 1")
            .setValue(ByteString.copyFromUtf8("Value 1")));

    metadata.addFileMetadata(MilanoTypeMetadata.FileMetadata.newBuilder().setKey("Key 2")
            .setValue(ByteString.copyFromUtf8("Value 2")));

    outputFormat.setMetadata(metadata.build());

    TaskAttemptContext context = protoTestObjects.getContext();
    Configuration conf = context.getConfiguration();

    @SuppressWarnings("unchecked")
    RecordWriter<String, Message> writer = outputFormat.getRecordWriter(context);

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        writer.write("dummy", protoTestObjects.getTestItem(i));
    }

    writer.close(protoTestObjects.getContext());
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);/*from  w w w .  j av a  2  s.c  om*/
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);//from   w ww . jav  a  2 s .c om
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);/*from   w  ww .  j a  v a2 s  .c  om*/
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}