Example usage for org.apache.hadoop.mapred RecordWriter close

List of usage examples for org.apache.hadoop.mapred RecordWriter close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred RecordWriter close.

Prototype

void close(Reporter reporter) throws IOException;

Source Link

Document

Close this RecordWriter to future operations.

Usage

From source file:HiveKeyIgnoringBAMOutputFormat.java

License:Open Source License

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf job, Path finalOutPath,
        final Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        Progressable progress) throws IOException {
    setSAMHeaderFrom(job);/*from ww w  . j a  va 2s .c om*/

    final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job);

    final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat
            .getRecordWriter(ctx, finalOutPath);

    return new FileSinkOperator.RecordWriter() {
        @Override
        public void write(Writable rec) throws IOException {
            try {
                wrappedRecordWriter.write(null, (SAMRecordWritable) rec);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public void close(boolean abort) throws IOException {
            try {
                wrappedRecordWriter.close(ctx);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:HiveKeyIgnoringBAMOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Writable, SAMRecordWritable> getRecordWriter(FileSystem fs, JobConf job, String name,
        Progressable progress) throws IOException {
    setSAMHeaderFrom(job);/*from w  ww  .  ja  v  a 2s  .  c om*/

    final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job);

    final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat
            .getRecordWriter(ctx, FileOutputFormat.getTaskOutputPath(job, name));

    return new RecordWriter<Writable, SAMRecordWritable>() {
        @Override
        public void write(Writable ignored, SAMRecordWritable rec) throws IOException {
            try {
                wrappedRecordWriter.write(ignored, rec);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            try {
                wrappedRecordWriter.close(ctx);
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
    };
}

From source file:cn.spark.Case.MyMultipleOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to
 * different output files// ww w . j a v a2  s  . c  o m
 * 
 * @param fs
 *            the file system to use
 * @param job
 *            the job conf for the job
 * @param name
 *            the leaf file name for the output file (such as part-00000")
 * @param arg3
 *            a progressable for reporting progress.
 * @return a composite record writer
 * @throws IOException
 */
public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3)
        throws IOException {

    final FileSystem myFS = fs;
    final String myName = generateLeafFileName(name);
    final JobConf myJob = job;
    final Progressable myProgressable = arg3;

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>();

        public void write(K key, V value) throws IOException {

            // get the file name based on the key
            String keyBasedPath = generateFileNameForKeyValue(key, value, myName);

            // get the file name based on the input file name
            String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath);

            // get the actual key   //??key
            K actualKey = generateActualKey(null, value);
            V actualValue = generateActualValue(key, value);

            RecordWriter<K, V> rw = this.recordWriters.get(finalPath);
            if (rw == null) {
                // if we don't have the record writer yet for the final
                // path, create
                // one
                // and add it to the cache
                rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable);
                this.recordWriters.put(finalPath, rw);
            }
            rw.write(actualKey, actualValue);
        };

        public void close(Reporter reporter) throws IOException {
            Iterator<String> keys = this.recordWriters.keySet().iterator();
            while (keys.hasNext()) {
                RecordWriter<K, V> rw = this.recordWriters.get(keys.next());
                rw.close(reporter);
            }
            this.recordWriters.clear();
        };
    };
}

From source file:com.aliyun.openservices.tablestore.hive.TableStoreOutputFormat.java

License:Apache License

@Override
public RecordWriter<Writable, BatchWriteWritable> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    String table = job.get(TableStoreConsts.TABLE_NAME);
    Configuration conf = translateConfig(job);
    SyncClientInterface ots = TableStore.newOtsClient(conf);
    final org.apache.hadoop.mapreduce.RecordWriter<Writable, BatchWriteWritable> writer = new TableStoreRecordWriter(
            ots, table);/* ww w .j a  v a  2  s .c  o m*/
    return new org.apache.hadoop.mapred.RecordWriter<Writable, BatchWriteWritable>() {
        @Override
        public void write(Writable any, BatchWriteWritable rows) throws IOException {
            try {
                writer.write(any, rows);
            } catch (InterruptedException ex) {
                throw new IOException("interrupted");
            }
        }

        @Override
        public void close(Reporter reporter) throws IOException {
            try {
                writer.close(null);
            } catch (InterruptedException ex) {
                throw new IOException("interrupted");
            }
        }
    };
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testMROutput() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;/*from w  w  w .ja  v  a 2s .com*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    conf.set("hive.io.file.readcolumn.ids", "1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
        Object sub = inspector.getStructFieldData(value, fields.get(1));
        assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
        assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
        rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testMROutput2() throws Exception {
    JobConf job = new JobConf(conf);
    // Test that you can set the output directory using this config
    job.set("mapred.work.output.dir", testFilePath.getParent().toString());
    Properties properties = new Properties();
    StructObjectInspector inspector;/* w  w w .j a v  a2  s  .  c o m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, job, testFilePath.getName(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new StringRow("a"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "col");
    properties.setProperty("columns.types", "string");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    reader.next(key, value);
    assertEquals("a", ((StringObjectInspector) fields.get(0).getFieldObjectInspector())
            .getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    reader.close();

}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

/**
 * Tests that passing null as the file system to getRecordWriter works, this is
 * to be compatible with the way Sequence and RC file tolerate nulls.
 * @throws Exception/*w  w w .  j a v a 2 s .c  om*/
 */
@Test
public void testNullFileSystem() throws Exception {
    conf.set("mapred.work.output.dir", testFilePath.getParent().toString());
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    OrcSerde serde = new OrcSerde();
    OrcOutputFormat outFormat = new OrcOutputFormat();
    RecordWriter<NullWritable, OrcSerdeRow> writer = outFormat.getRecordWriter(null, conf,
            testFilePath.getName(), Reporter.NULL);

    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("a"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("b"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("c"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    OrcInputFormat in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcLazyRow> reader = in.getRecordReader(splits[0], conf,
            Reporter.NULL);
    NullWritable key = reader.createKey();
    OrcLazyRow value = (OrcLazyRow) reader.createValue();
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("a", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("b", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("c", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
}

From source file:com.facebook.presto.hive.AbstractTestHiveFileFormats.java

License:Apache License

public FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat,
        @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec) throws Exception {
    JobConf jobConf = new JobConf();
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", COLUMN_NAMES_STRING);
    tableProperties.setProperty("columns.types", COLUMN_TYPES);
    serDe.initialize(new Configuration(), tableProperties);

    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(new Configuration())
                .getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }/*w w  w  .  j  a  v a  2  s  .  com*/

    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(filePath), Text.class,
            compressionCodec != null, tableProperties, new Progressable() {
                @Override
                public void progress() {
                }
            });

    try {
        serDe.initialize(new Configuration(), tableProperties);

        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(COLUMN_NAMES,
                FIELD_INSPECTORS);
        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < NUM_ROWS; rowNumber++) {
            for (int i = 0; i < TEST_VALUES.size(); i++) {
                Object key = TEST_VALUES.get(i).getKey();
                if (key instanceof Slice) {
                    key = ((Slice) key).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), key);
            }

            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
        }
    } finally {
        recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(new Configuration()).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}

From source file:com.facebook.presto.hive.BenchmarkHiveFileFormats.java

License:Apache License

public static DataSize writeLineItems(File outputFile, HiveOutputFormat<?, ?> outputFormat,
        @SuppressWarnings("deprecation") Serializer serializer, CompressionType compressionType,
        List<? extends TpchColumn<?>> columns) throws Exception {
    RecordWriter recordWriter = createRecordWriter(columns, outputFile, outputFormat, compressionType);

    SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
            transform(columns, input -> input.getColumnName()),
            transform(columns, input -> getObjectInspector(input)));

    Object row = objectInspector.create();

    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

    for (LineItem lineItem : new LineItemGenerator(1, 1, 1)) {
        objectInspector.setStructFieldData(row, fields.get(0), lineItem.getOrderKey());
        objectInspector.setStructFieldData(row, fields.get(1), lineItem.getPartKey());
        objectInspector.setStructFieldData(row, fields.get(2), lineItem.getSupplierKey());
        objectInspector.setStructFieldData(row, fields.get(3), lineItem.getLineNumber());
        objectInspector.setStructFieldData(row, fields.get(4), lineItem.getQuantity());
        objectInspector.setStructFieldData(row, fields.get(5), lineItem.getExtendedPrice());
        objectInspector.setStructFieldData(row, fields.get(6), lineItem.getDiscount());
        objectInspector.setStructFieldData(row, fields.get(7), lineItem.getTax());
        objectInspector.setStructFieldData(row, fields.get(8), lineItem.getReturnFlag());
        objectInspector.setStructFieldData(row, fields.get(9), lineItem.getStatus());
        objectInspector.setStructFieldData(row, fields.get(10), lineItem.getShipDate());
        objectInspector.setStructFieldData(row, fields.get(11), lineItem.getCommitDate());
        objectInspector.setStructFieldData(row, fields.get(12), lineItem.getReceiptDate());
        objectInspector.setStructFieldData(row, fields.get(13), lineItem.getShipInstructions());
        objectInspector.setStructFieldData(row, fields.get(14), lineItem.getShipMode());
        objectInspector.setStructFieldData(row, fields.get(15), lineItem.getComment());

        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);/*  www .j  a v  a 2s .  c  o m*/
    }

    recordWriter.close(false);
    return getFileSize(outputFile);
}

From source file:com.facebook.presto.hive.parquet.ParquetTester.java

License:Apache License

private static DataSize writeParquetColumn(JobConf jobConf, File outputFile,
        CompressionCodecName compressionCodecName, ObjectInspector columnObjectInspector, Iterator<?> values)
        throws Exception {
    RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf,
            new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED,
            createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
            });// w  ww  .  j  a  va  2s.co m
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test",
            columnObjectInspector);
    Object row = objectInspector.create();

    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

    int i = 0;
    while (values.hasNext()) {
        Object value = values.next();
        objectInspector.setStructFieldData(row, fields.get(0), value);

        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, createTableProperties("test", columnObjectInspector.getTypeName()), null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }

    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}