Example usage for org.apache.hadoop.mapred FileOutputFormat getCompressOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileOutputFormat getCompressOutput.

Prototype

public static boolean getCompressOutput(JobConf conf)

Source Link

Document

Is the job output compressed?

Usage

From source file:com.datascience.hadoop.CsvOutputFormat.java

License:Apache License

@Override
public RecordWriter<LongWritable, ListWritable<Text>> getRecordWriter(FileSystem fileSystem, JobConf conf,
        String name, Progressable progress) throws IOException {
    String charsetName = conf.get(CHARSET);
    Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8;

    Path path;//w w w  . java  2  s.co m
    if (FileOutputFormat.getCompressOutput(conf)) {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(conf,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf);
        path = FileOutputFormat.getTaskOutputPath(conf, name + codec.getDefaultExtension());
    } else {
        path = FileOutputFormat.getTaskOutputPath(conf, name);
    }
    return new CsvRecordWriter(new OutputStreamWriter(path.getFileSystem(conf).create(path, progress), charset),
            createFormat(conf));
}

From source file:com.ricemap.spateDB.core.GridRecordWriter.java

License:Apache License

/**
 * Creates an output stream that will be used to write the final cell file
 * @param cellFilePath/*  w w  w .jav a 2 s .  co  m*/
 * @return
 * @throws IOException 
 */
protected OutputStream createFinalCellStream(Path cellFilePath) throws IOException {
    OutputStream cellStream;
    boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);

    if (!isCompressed) {
        // Create new file
        cellStream = fileSystem.create(cellFilePath, true,
                fileSystem.getConf().getInt("io.file.buffer.size", 4096),
                fileSystem.getDefaultReplication(outDir), this.blockSize);
    } else {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf,
                GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);

        // Open a stream to the output file
        cellStream = fileSystem.create(cellFilePath, true,
                fileSystem.getConf().getInt("io.file.buffer.size", 4096),
                fileSystem.getDefaultReplication(outDir), this.blockSize);

        // Encode the output stream using the codec
        cellStream = new DataOutputStream(codec.createOutputStream(cellStream));
    }

    return cellStream;
}

From source file:com.ricemap.spateDB.core.GridRecordWriter.java

License:Apache License

/**
 * Returns path to a file in which the final cell will be written.
 * @param column/*from  w  ww.  jav  a  2 s .c  om*/
 * @param row
 * @return
 * @throws IOException 
 */
protected Path getFinalCellPath(int cellIndex) throws IOException {
    Path path = null;
    do {
        String filename = counter == 0 ? String.format("data_%05d", cellIndex)
                : String.format("data_%05d_%d", cellIndex, counter);
        boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);
        if (isCompressed) {
            Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf,
                    GzipCodec.class);
            // create the named codec
            CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);
            filename += codec.getDefaultExtension();
        }

        path = getFilePath(filename);
        counter++;
    } while (fileSystem.exists(path));
    return path;
}

From source file:com.tomslabs.grid.avro.TextTypedBytesToAvroOutputFormat.java

License:Apache License

public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job,
        String name, Progressable prog) throws IOException {

    boolean isMapOnly = job.getNumReduceTasks() == 0;
    Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);

    final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>(schema));

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }/*from w  w w  .ja v a2 s .  co m*/

    // copy metadata from job
    for (Map.Entry<String, String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroRecordWriter(writer, schema);
}

From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java

License:Open Source License

/**
 * Creates an output stream that will be used to write the final cell file
 * @param cellFilePath//from   ww w .j av  a 2  s . co m
 * @return
 * @throws IOException 
 */
protected OutputStream createFinalCellStream(Path cellFilePath) throws IOException {
    OutputStream cellStream;
    boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);

    if (!isCompressed) {
        // Create new file
        cellStream = fileSystem.create(cellFilePath, true,
                fileSystem.getConf().getInt("io.file.buffer.size", 4096),
                fileSystem.getDefaultReplication(cellFilePath), this.blockSize);
    } else {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf,
                GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);

        // Open a stream to the output file
        cellStream = fileSystem.create(cellFilePath, true,
                fileSystem.getConf().getInt("io.file.buffer.size", 4096),
                fileSystem.getDefaultReplication(cellFilePath), this.blockSize);

        // Encode the output stream using the codec
        cellStream = new DataOutputStream(codec.createOutputStream(cellStream));
    }

    return cellStream;
}

From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java

License:Open Source License

/**
 * Returns path to a file in which the final cell will be written.
 * @param cellIndex The index of the cell to retrieve its output path.
 * @return/*from   w w w. j ava  2s  . c o m*/
 * @throws IOException
 */
protected Path getFinalCellPath(int cellIndex) throws IOException {
    Path path;
    do {
        String filename = counter == 0 ? String.format("data_%05d", cellIndex)
                : String.format("data_%05d_%d", cellIndex, counter);
        boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf);
        if (isCompressed) {
            Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf,
                    GzipCodec.class);
            // create the named codec
            CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf);
            filename += codec.getDefaultExtension();
        }

        path = getFilePath(filename);
        counter++;
    } while (fileSystem.exists(path));
    return path;
}

From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java

License:Apache License

static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job)
        throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*w  w  w  .  j a  v a2s  . c  om*/
    }

    writer.setSyncInterval(
            job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String, String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
}

From source file:org.apache.avro.mapred.AvroOutputFormat.java

License:Apache License

/** This will select the correct compression codec from the JobConf.
 * The order of selection is as follows:
 * <ul>/*  ww  w  .  j  a v a  2s  . co m*/
 *   <li>If mapred.output.compress is true then look for codec otherwise no compression</li>
 *   <li>Use avro.output.codec if populated</li>
 *   <li>Next use mapred.output.compression.codec if populated</li>
 *   <li>If not default to Deflate Codec</li>
 * </ul>  
 */
static CodecFactory getCodecFactory(JobConf job) {
    CodecFactory factory = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC);

        if (codecName == null) {
            String codecClassName = job.get("mapred.output.compression.codec", null);
            String avroCodecName = HadoopCodecFactory.getAvroCodecName(codecClassName);
            if (codecClassName != null && avroCodecName != null) {
                factory = HadoopCodecFactory.fromHadoopString(codecClassName);
                job.set(AvroJob.OUTPUT_CODEC, avroCodecName);
                return factory;
            } else {
                return CodecFactory.deflateCodec(level);
            }
        } else {
            if (codecName.equals(DEFLATE_CODEC)) {
                factory = CodecFactory.deflateCodec(level);
            } else {
                factory = CodecFactory.fromString(codecName);
            }
        }
    }

    return factory;
}

From source file:org.apache.avro.mapred.tether.TetherOutputFormat.java

License:Apache License

@SuppressWarnings("unchecked")
public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name,
        Progressable prog) throws IOException {

    Schema schema = AvroJob.getOutputSchema(job);

    final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter());

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }/*  w  w w  .j a  v a 2  s  . c o m*/

    Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new RecordWriter<TetherData, NullWritable>() {
        public void write(TetherData datum, NullWritable ignore) throws IOException {
            writer.appendEncoded(datum.buffer());
        }

        public void close(Reporter reporter) throws IOException {
            writer.close();
        }
    };
}

From source file:org.apache.pig.impl.util.avro.AvroRecordWriter.java

License:Apache License

static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer, JobConf job)
        throws UnsupportedEncodingException {
    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/* w  ww. j av a  2  s . com*/
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY,
            Math.max(job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    // copy metadata from job
    for (Map.Entry<String, String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
}