List of usage examples for org.apache.hadoop.mapred RecordWriter close
void close(Reporter reporter) throws IOException;
RecordWriter
to future operations. From source file:HiveKeyIgnoringBAMOutputFormat.java
License:Open Source License
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf job, Path finalOutPath, final Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { setSAMHeaderFrom(job);/*from ww w . j a va 2s .c om*/ final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job); final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat .getRecordWriter(ctx, finalOutPath); return new FileSinkOperator.RecordWriter() { @Override public void write(Writable rec) throws IOException { try { wrappedRecordWriter.write(null, (SAMRecordWritable) rec); } catch (InterruptedException e) { throw new RuntimeException(e); } } @Override public void close(boolean abort) throws IOException { try { wrappedRecordWriter.close(ctx); } catch (InterruptedException e) { throw new RuntimeException(e); } } }; }
From source file:HiveKeyIgnoringBAMOutputFormat.java
License:Open Source License
@Override public RecordWriter<Writable, SAMRecordWritable> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { setSAMHeaderFrom(job);/*from w ww . ja v a 2s . c om*/ final FakeTaskAttemptContext ctx = new FakeTaskAttemptContext(job); final org.apache.hadoop.mapreduce.RecordWriter<Writable, SAMRecordWritable> wrappedRecordWriter = wrappedOutputFormat .getRecordWriter(ctx, FileOutputFormat.getTaskOutputPath(job, name)); return new RecordWriter<Writable, SAMRecordWritable>() { @Override public void write(Writable ignored, SAMRecordWritable rec) throws IOException { try { wrappedRecordWriter.write(ignored, rec); } catch (InterruptedException e) { throw new RuntimeException(e); } } @Override public void close(Reporter reporter) throws IOException { try { wrappedRecordWriter.close(ctx); } catch (InterruptedException e) { throw new RuntimeException(e); } } }; }
From source file:cn.spark.Case.MyMultipleOutputFormat.java
License:Apache License
/** * Create a composite record writer that can write key/value data to * different output files// ww w . j a v a2 s . c o m * * @param fs * the file system to use * @param job * the job conf for the job * @param name * the leaf file name for the output file (such as part-00000") * @param arg3 * a progressable for reporting progress. * @return a composite record writer * @throws IOException */ public RecordWriter<K, V> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException { final FileSystem myFS = fs; final String myName = generateLeafFileName(name); final JobConf myJob = job; final Progressable myProgressable = arg3; return new RecordWriter<K, V>() { // a cache storing the record writers for different output files. TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>(); public void write(K key, V value) throws IOException { // get the file name based on the key String keyBasedPath = generateFileNameForKeyValue(key, value, myName); // get the file name based on the input file name String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath); // get the actual key //??key K actualKey = generateActualKey(null, value); V actualValue = generateActualValue(key, value); RecordWriter<K, V> rw = this.recordWriters.get(finalPath); if (rw == null) { // if we don't have the record writer yet for the final // path, create // one // and add it to the cache rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable); this.recordWriters.put(finalPath, rw); } rw.write(actualKey, actualValue); }; public void close(Reporter reporter) throws IOException { Iterator<String> keys = this.recordWriters.keySet().iterator(); while (keys.hasNext()) { RecordWriter<K, V> rw = this.recordWriters.get(keys.next()); rw.close(reporter); } this.recordWriters.clear(); }; }; }
From source file:com.aliyun.openservices.tablestore.hive.TableStoreOutputFormat.java
License:Apache License
@Override public RecordWriter<Writable, BatchWriteWritable> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String table = job.get(TableStoreConsts.TABLE_NAME); Configuration conf = translateConfig(job); SyncClientInterface ots = TableStore.newOtsClient(conf); final org.apache.hadoop.mapreduce.RecordWriter<Writable, BatchWriteWritable> writer = new TableStoreRecordWriter( ots, table);/* ww w .j a v a 2 s .c o m*/ return new org.apache.hadoop.mapred.RecordWriter<Writable, BatchWriteWritable>() { @Override public void write(Writable any, BatchWriteWritable rows) throws IOException { try { writer.write(any, rows); } catch (InterruptedException ex) { throw new IOException("interrupted"); } } @Override public void close(Reporter reporter) throws IOException { try { writer.close(null); } catch (InterruptedException ex) { throw new IOException("interrupted"); } } }; }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
@Test public void testMROutput() throws Exception { JobConf job = new JobConf(conf); Properties properties = new Properties(); StructObjectInspector inspector;/*from w w w .ja v a 2s .com*/ synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } SerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL); writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector)); writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector)); writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "z,r"); properties.setProperty("columns.types", "int:struct<x:int,y:int>"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat<?, ?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); conf.set("hive.io.file.readcolumn.ids", "1"); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); int rowNum = 0; List<? extends StructField> fields = inspector.getAllStructFieldRefs(); StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector(); List<? extends StructField> inFields = inner.getAllStructFieldRefs(); IntObjectInspector intInspector = (IntObjectInspector) inFields.get(0).getFieldObjectInspector(); while (reader.next(key, value)) { assertEquals(null, inspector.getStructFieldData(value, fields.get(0))); Object sub = inspector.getStructFieldData(value, fields.get(1)); assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0)))); assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1)))); rowNum += 1; } assertEquals(3, rowNum); reader.close(); }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
@Test public void testMROutput2() throws Exception { JobConf job = new JobConf(conf); // Test that you can set the output directory using this config job.set("mapred.work.output.dir", testFilePath.getParent().toString()); Properties properties = new Properties(); StructObjectInspector inspector;/* w w w .j a v a2 s . c o m*/ synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } SerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); RecordWriter writer = outFormat.getRecordWriter(fs, job, testFilePath.getName(), Reporter.NULL); writer.write(NullWritable.get(), serde.serialize(new StringRow("a"), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "col"); properties.setProperty("columns.types", "string"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat<?, ?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); int rowNum = 0; List<? extends StructField> fields = inspector.getAllStructFieldRefs(); reader.next(key, value); assertEquals("a", ((StringObjectInspector) fields.get(0).getFieldObjectInspector()) .getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); reader.close(); }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
/** * Tests that passing null as the file system to getRecordWriter works, this is * to be compatible with the way Sequence and RC file tolerate nulls. * @throws Exception/*w w w . j a v a 2 s .c om*/ */ @Test public void testNullFileSystem() throws Exception { conf.set("mapred.work.output.dir", testFilePath.getParent().toString()); JobConf job = new JobConf(conf); Properties properties = new Properties(); StructObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } OrcSerde serde = new OrcSerde(); OrcOutputFormat outFormat = new OrcOutputFormat(); RecordWriter<NullWritable, OrcSerdeRow> writer = outFormat.getRecordWriter(null, conf, testFilePath.getName(), Reporter.NULL); writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("a"), inspector)); writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("b"), inspector)); writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("c"), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "str,str2"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); OrcInputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); // read the whole file org.apache.hadoop.mapred.RecordReader<NullWritable, OrcLazyRow> reader = in.getRecordReader(splits[0], conf, Reporter.NULL); NullWritable key = reader.createKey(); OrcLazyRow value = (OrcLazyRow) reader.createValue(); List<? extends StructField> fields = inspector.getAllStructFieldRefs(); StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector(); assertEquals(true, reader.next(key, value)); assertEquals("a", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("b", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("c", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(false, reader.next(key, value)); reader.close(); }
From source file:com.facebook.presto.hive.AbstractTestHiveFileFormats.java
License:Apache License
public FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec) throws Exception { JobConf jobConf = new JobConf(); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", COLUMN_NAMES_STRING); tableProperties.setProperty("columns.types", COLUMN_TYPES); serDe.initialize(new Configuration(), tableProperties); if (compressionCodec != null) { CompressionCodec codec = new CompressionCodecFactory(new Configuration()) .getCodecByName(compressionCodec); jobConf.set(COMPRESS_CODEC, codec.getClass().getName()); jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString()); }/*w w w . j a v a 2 s . com*/ RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(filePath), Text.class, compressionCodec != null, tableProperties, new Progressable() { @Override public void progress() { } }); try { serDe.initialize(new Configuration(), tableProperties); SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(COLUMN_NAMES, FIELD_INSPECTORS); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (int rowNumber = 0; rowNumber < NUM_ROWS; rowNumber++) { for (int i = 0; i < TEST_VALUES.size(); i++) { Object key = TEST_VALUES.get(i).getKey(); if (key instanceof Slice) { key = ((Slice) key).getBytes(); } objectInspector.setStructFieldData(row, fields.get(i), key); } Writable record = serDe.serialize(row, objectInspector); recordWriter.write(record); } } finally { recordWriter.close(false); } Path path = new Path(filePath); path.getFileSystem(new Configuration()).setVerifyChecksum(true); File file = new File(filePath); return new FileSplit(path, 0, file.length(), new String[0]); }
From source file:com.facebook.presto.hive.BenchmarkHiveFileFormats.java
License:Apache License
public static DataSize writeLineItems(File outputFile, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") Serializer serializer, CompressionType compressionType, List<? extends TpchColumn<?>> columns) throws Exception { RecordWriter recordWriter = createRecordWriter(columns, outputFile, outputFormat, compressionType); SettableStructObjectInspector objectInspector = getStandardStructObjectInspector( transform(columns, input -> input.getColumnName()), transform(columns, input -> getObjectInspector(input))); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); for (LineItem lineItem : new LineItemGenerator(1, 1, 1)) { objectInspector.setStructFieldData(row, fields.get(0), lineItem.getOrderKey()); objectInspector.setStructFieldData(row, fields.get(1), lineItem.getPartKey()); objectInspector.setStructFieldData(row, fields.get(2), lineItem.getSupplierKey()); objectInspector.setStructFieldData(row, fields.get(3), lineItem.getLineNumber()); objectInspector.setStructFieldData(row, fields.get(4), lineItem.getQuantity()); objectInspector.setStructFieldData(row, fields.get(5), lineItem.getExtendedPrice()); objectInspector.setStructFieldData(row, fields.get(6), lineItem.getDiscount()); objectInspector.setStructFieldData(row, fields.get(7), lineItem.getTax()); objectInspector.setStructFieldData(row, fields.get(8), lineItem.getReturnFlag()); objectInspector.setStructFieldData(row, fields.get(9), lineItem.getStatus()); objectInspector.setStructFieldData(row, fields.get(10), lineItem.getShipDate()); objectInspector.setStructFieldData(row, fields.get(11), lineItem.getCommitDate()); objectInspector.setStructFieldData(row, fields.get(12), lineItem.getReceiptDate()); objectInspector.setStructFieldData(row, fields.get(13), lineItem.getShipInstructions()); objectInspector.setStructFieldData(row, fields.get(14), lineItem.getShipMode()); objectInspector.setStructFieldData(row, fields.get(15), lineItem.getComment()); Writable record = serializer.serialize(row, objectInspector); recordWriter.write(record);/* www .j a v a 2s . c o m*/ } recordWriter.close(false); return getFileSize(outputFile); }
From source file:com.facebook.presto.hive.parquet.ParquetTester.java
License:Apache License
private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception { RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, createTableProperties("test", columnObjectInspector.getTypeName()), () -> { });// w ww . j a va 2s.co m SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector); Object row = objectInspector.create(); List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs()); int i = 0; while (values.hasNext()) { Object value = values.next(); objectInspector.setStructFieldData(row, fields.get(0), value); ParquetHiveSerDe serde = new ParquetHiveSerDe(); serde.initialize(jobConf, createTableProperties("test", columnObjectInspector.getTypeName()), null); Writable record = serde.serialize(row, objectInspector); recordWriter.write(record); i++; } recordWriter.close(false); return succinctBytes(outputFile.length()); }