Example usage for org.apache.hadoop.mapred FileOutputFormat getTaskOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileOutputFormat getTaskOutputPath.

Prototype

public static Path getTaskOutputPath(JobConf conf, String name) throws IOException

Source Link

Document

Helper function to create the task's temporary output directory and return the path to the task's output file.

Usage

From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name,
        Progressable prog) throws IOException {

    Schema schema;//from w  w  w. ja  va  2 s .co m
    Schema.Parser p = new Schema.Parser();
    String strSchema = job.get("iow.streaming.output.schema");
    if (strSchema == null) {

        String schemaFile = job.get("iow.streaming.output.schema.file", "streaming_output_schema");

        if (job.getBoolean("iow.streaming.schema.use.prefix", false)) {
            // guess schema from file name
            // format is: schema:filename
            // with special keyword default - 'default:filename'

            String str[] = name.split(":");
            if (!str[0].equals("default"))
                schemaFile = str[0];

            name = str[1];
        }

        LOG.info(this.getClass().getSimpleName() + ": Using schema from file: " + schemaFile);
        File f = new File(schemaFile);
        schema = p.parse(f);
    } else {
        LOG.info(this.getClass().getSimpleName() + ": Using schema from jobconf.");
        schema = p.parse(strSchema);
    }

    if (schema == null) {
        throw new IOException("Can't find proper output schema");
    }

    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>());

    configureDataFileWriter(writer, job);

    Path path = FileOutputFormat.getTaskOutputPath(job, name + org.apache.avro.mapred.AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return createRecordWriter(writer, schema);
}

From source file:newprotobuf.mapred.ProtobufOutputFormat.java

License:Open Source License

public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {

    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    FSDataOutputStream fileOut = fs.create(file, progress);
    return new ProtobufRecordWriter<K, V>(fileOut);
}

From source file:org.apache.avro.mapred.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name,
        Progressable prog) throws IOException {

    boolean isMapOnly = job.getNumReduceTasks() == 0;
    Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
    GenericData dataModel = AvroJob.createDataModel(job);

    final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null));

    configureDataFileWriter(writer, job);

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.append(wrapper.datum());
        }//from w  w w.  j  av  a2s .  c  o m

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }
    };
}

From source file:org.apache.avro.mapred.AvroTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog)
        throws IOException {

    Schema schema = Schema.create(Schema.Type.BYTES);

    final byte[] keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t")
            .getBytes(UTF8);//from  w  ww  .  jav  a 2s.co m

    final DataFileWriter<ByteBuffer> writer = new DataFileWriter<ByteBuffer>(
            new ReflectDatumWriter<ByteBuffer>());

    AvroOutputFormat.configureDataFileWriter(writer, job);

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroTextRecordWriter(writer, keyValueSeparator);
}

From source file:org.apache.avro.mapred.tether.TetherOutputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name,
        Progressable prog) throws IOException {

    Schema schema = AvroJob.getOutputSchema(job);

    final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }/*from  w w w . j  av a 2 s .  c  o  m*/

    Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new RecordWriter<TetherData, NullWritable>() {
        public void write(TetherData datum, NullWritable ignore) throws IOException {
            writer.appendEncoded(datum.buffer());
        }

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }
    };
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private HivePartitionWriter writerForLocation(String location) throws IOException {
    JobConf clonedConf = new JobConf(jobConf);
    clonedConf.set(OUTDIR, location);//w ww.j a va  2s .  c om
    OutputFormat outputFormat;
    try {
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        Class outputFormatClz = Class.forName(sd.getOutputFormat(), true,
                Thread.currentThread().getContextClassLoader());
        outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz);
        outputFormat = (OutputFormat) outputFormatClz.newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e);
    }
    ReflectionUtils.setConf(outputFormat, clonedConf);
    OutputCommitter outputCommitter = clonedConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(clonedConf, new JobID());
    outputCommitter.setupJob(jobContext);
    final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
    if (isCompressed) {
        String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
            try {
                Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class
                        .forName(codecStr, true, Thread.currentThread().getContextClassLoader());
                FileOutputFormat.setOutputCompressorClass(clonedConf, codec);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }
        String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
            SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
            SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style);
        }
    }
    String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1));
    Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition);
    FileSinkOperator.RecordWriter recordWriter;
    try {
        recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed,
                tblProperties, taskPath, Reporter.NULL);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter);
}

From source file:org.apache.lens.lib.query.LensFileOutputFormat.java

License:Apache License

public RecordWriter<NullWritable, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    return createRecordWriter(job, FileOutputFormat.getTaskOutputPath(job, name), progress,
            getCompressOutput(job), getOuptutFileExtn(job), getResultEncoding(job));
}

From source file:org.apache.sysml.runtime.matrix.data.UnPaddedOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {
    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    FSDataOutputStream fileOut = fs.create(file, true,
            job.getInt(MRConfigurationNames.IO_FILE_BUFFER_SIZE, 4096), progress);
    return new UnpaddedRecordWriter<>(fileOut);
}

From source file:org.apache.sysml.runtime.matrix.sort.CompactOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {

    Path file = FileOutputFormat.getTaskOutputPath(job, name);
    FileSystem fs = file.getFileSystem(job);
    FSDataOutputStream fileOut = fs.create(file, progress);
    return new FixedLengthRecordWriter<>(fileOut, job);
}

From source file:org.apache.trevni.avro.AvroTrevniOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job,
        final String name, Progressable prog) throws IOException {

    boolean isMapOnly = job.getNumReduceTasks() == 0;
    final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);

    final ColumnFileMetaData meta = filterMetadata(job);

    final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
    final FileSystem fs = dir.getFileSystem(job);
    if (!fs.mkdirs(dir))
        throw new IOException("Failed to create directory: " + dir);
    final long blockSize = fs.getDefaultBlockSize();

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        private int part = 0;

        private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());

        private void flush() throws IOException {
            OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
            try {
                writer.writeTo(out);/*from   w w  w.java 2  s . co m*/
            } finally {
                out.close();
            }
            writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
        }

        public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            writer.write(wrapper.datum());
            if (writer.sizeEstimate() >= blockSize) // block full
                flush();
        }

        public void close(Reporter reporter) throws IOException {
            flush();
        }
    };
}