List of usage examples for org.apache.hadoop.mapred FileOutputFormat getTaskOutputPath
public static Path getTaskOutputPath(JobConf conf, String name) throws IOException
From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema;//from w w w. ja va 2 s .co m Schema.Parser p = new Schema.Parser(); String strSchema = job.get("iow.streaming.output.schema"); if (strSchema == null) { String schemaFile = job.get("iow.streaming.output.schema.file", "streaming_output_schema"); if (job.getBoolean("iow.streaming.schema.use.prefix", false)) { // guess schema from file name // format is: schema:filename // with special keyword default - 'default:filename' String str[] = name.split(":"); if (!str[0].equals("default")) schemaFile = str[0]; name = str[1]; } LOG.info(this.getClass().getSimpleName() + ": Using schema from file: " + schemaFile); File f = new File(schemaFile); schema = p.parse(f); } else { LOG.info(this.getClass().getSimpleName() + ": Using schema from jobconf."); schema = p.parse(strSchema); } if (schema == null) { throw new IOException("Can't find proper output schema"); } DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>()); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name + org.apache.avro.mapred.AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return createRecordWriter(writer, schema); }
From source file:newprotobuf.mapred.ProtobufOutputFormat.java
License:Open Source License
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new ProtobufRecordWriter<K, V>(fileOut); }
From source file:org.apache.avro.mapred.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); }//from w w w. j av a2s . c o m public void close(Reporter reporter) throws IOException { writer.close(); } }; }
From source file:org.apache.avro.mapred.AvroTextOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = Schema.create(Schema.Type.BYTES); final byte[] keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t") .getBytes(UTF8);//from w ww . jav a 2s.co m final DataFileWriter<ByteBuffer> writer = new DataFileWriter<ByteBuffer>( new ReflectDatumWriter<ByteBuffer>()); AvroOutputFormat.configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroTextRecordWriter(writer, keyValueSeparator); }
From source file:org.apache.avro.mapred.tether.TetherOutputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); }/*from w w w . j av a 2 s . c o m*/ Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private HivePartitionWriter writerForLocation(String location) throws IOException { JobConf clonedConf = new JobConf(jobConf); clonedConf.set(OUTDIR, location);//w ww.j a va 2s . c om OutputFormat outputFormat; try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Class outputFormatClz = Class.forName(sd.getOutputFormat(), true, Thread.currentThread().getContextClassLoader()); outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz); outputFormat = (OutputFormat) outputFormatClz.newInstance(); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e); } ReflectionUtils.setConf(outputFormat, clonedConf); OutputCommitter outputCommitter = clonedConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(clonedConf, new JobID()); outputCommitter.setupJob(jobContext); final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false); if (isCompressed) { String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname); if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) { try { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class .forName(codecStr, true, Thread.currentThread().getContextClassLoader()); FileOutputFormat.setOutputCompressorClass(clonedConf, codec); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname); if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) { SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr); SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style); } } String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1)); Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition); FileSinkOperator.RecordWriter recordWriter; try { recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed, tblProperties, taskPath, Reporter.NULL); } catch (HiveException e) { throw new IOException(e); } return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter); }
From source file:org.apache.lens.lib.query.LensFileOutputFormat.java
License:Apache License
public RecordWriter<NullWritable, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { return createRecordWriter(job, FileOutputFormat.getTaskOutputPath(job, name), progress, getCompressOutput(job), getOuptutFileExtn(job), getResultEncoding(job)); }
From source file:org.apache.sysml.runtime.matrix.data.UnPaddedOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, true, job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), progress); return new UnpaddedRecordWriter<>(fileOut); }
From source file:org.apache.sysml.runtime.matrix.sort.CompactOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new FixedLengthRecordWriter<>(fileOut, job); }
From source file:org.apache.trevni.avro.AvroTrevniOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); final ColumnFileMetaData meta = filterMetadata(job); final Path dir = FileOutputFormat.getTaskOutputPath(job, name); final FileSystem fs = dir.getFileSystem(job); if (!fs.mkdirs(dir)) throw new IOException("Failed to create directory: " + dir); final long blockSize = fs.getDefaultBlockSize(); return new RecordWriter<AvroWrapper<T>, NullWritable>() { private int part = 0; private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get()); private void flush() throws IOException { OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT)); try { writer.writeTo(out);/*from w w w.java 2 s . co m*/ } finally { out.close(); } writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get()); } public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.write(wrapper.datum()); if (writer.sizeEstimate() >= blockSize) // block full flush(); } public void close(Reporter reporter) throws IOException { flush(); } }; }