Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getCompressOutput

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getCompressOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getCompressOutput.

Prototype

public static boolean getCompressOutput(JobContext job) 

Source Link

Document

Is the job output compressed?

Usage

From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java

License:Apache License

/**
 * Creates a new {@link RecordWriter} to output temporary data.
 * @param <V> value type//from w  ww .j a  v a2  s . co  m
 * @param context current context
 * @param name output name
 * @param dataType value type
 * @return the created writer
 * @throws IOException if failed to create a new {@link RecordWriter}
 * @throws InterruptedException if interrupted
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> RecordWriter<NullWritable, V> createRecordWriter(TaskAttemptContext context, String name,
        Class<V> dataType) throws IOException, InterruptedException {
    if (context == null) {
        throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
    }
    if (name == null) {
        throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$
    }
    if (dataType == null) {
        throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$
    }
    CompressionCodec codec = null;
    Configuration conf = context.getConfiguration();
    if (FileOutputFormat.getCompressOutput(context)) {
        Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    FileOutputCommitter committer = getOutputCommitter(context);
    final Path file = new Path(committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$
    final ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec);
    return new RecordWriter<NullWritable, V>() {

        @Override
        public void write(NullWritable key, V value) throws IOException {
            out.write(value);
        }

        @Override
        public void close(TaskAttemptContext ignored) throws IOException {
            out.close();
        }

        @Override
        public String toString() {
            return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$
        }
    };
}

From source file:com.bigfishgames.biginsights.upsight.mapreduce.MyAvroOutputFormatBase.java

License:Apache License

/**
 * Gets the configured compression codec from the task context.
 *
 * @param context The task attempt context.
 * @return The compression codec to use for the output Avro container file.
 *///from  w w w  . ja  va  2  s. co m
protected static CodecFactory getCompressionCodec(TaskAttemptContext context) {
    if (FileOutputFormat.getCompressOutput(context)) {
        // Default to deflate compression.
        int deflateLevel = context.getConfiguration().getInt(
                org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL);
        int xzLevel = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.XZ_LEVEL_KEY,
                CodecFactory.DEFAULT_XZ_LEVEL);

        String outputCodec = context.getConfiguration().get(AvroJob.CONF_OUTPUT_CODEC);

        if (outputCodec == null) {
            String compressionCodec = context.getConfiguration().get("mapred.output.compression.codec");
            String avroCodecName = HadoopCodecFactory.getAvroCodecName(compressionCodec);
            if (avroCodecName != null) {
                context.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, avroCodecName);
                return HadoopCodecFactory.fromHadoopString(compressionCodec);
            } else {
                return CodecFactory.deflateCodec(deflateLevel);
            }
        } else if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) {
            return CodecFactory.deflateCodec(deflateLevel);
        } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) {
            return CodecFactory.xzCodec(xzLevel);
        } else {
            return CodecFactory.fromString(outputCodec);
        }

    }

    // No compression.
    return CodecFactory.nullCodec();
}

From source file:com.ci.backports.avro.mapreduce.AvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException {
    Schema schema = AvroJob.getOutputSchema(context.getConfiguration());
    if (schema == null) {
        throw new RuntimeException("AvroOutputFormat requires an output schema.");
    }/*from   ww  w  .  j  a  v  a 2  s .c o m*/

    final DataFileWriter<T> writer = new DataFileWriter<T>(new SpecificDatumWriter<T>());

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }

    Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(context.getConfiguration()).create(path));

    return new RecordWriter<AvroWrapper<T>, NullWritable>() {
        public void write(AvroWrapper<T> record, NullWritable ignore) throws IOException {
            writer.append(record.datum());
        }

        public void close(TaskAttemptContext context) throws IOException {
            writer.close();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java

License:Apache License

static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job, String codecName,
        int deflateLevel) throws UnsupportedEncodingException {
    Configuration conf = job.getConfiguration();
    if (FileOutputFormat.getCompressOutput(job)) {
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);//from w  w w. ja va 2 s .co  m
    }

    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String, String> e : conf) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java

License:Apache License

@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {

    if (schema == null)
        throw new IOException("Must provide a schema");

    Configuration conf = context.getConfiguration();

    DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));

    if (FileOutputFormat.getCompressOutput(context)) {
        int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*from ww  w. j  a va 2s  .  c  o m*/
    }

    // Do max as core-default.xml has io.file.buffer.size as 4K
    writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY,
            Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL)));

    Path path = getDefaultWorkFile(context, EXT);
    writer.create(schema, path.getFileSystem(conf).create(path));
    return new PigAvroRecordWriter(writer);
}

From source file:com.metamx.druid.indexer.Utils.java

License:Open Source License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;/*  w w  w . j  a  v  a 2s  . c  om*/
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (!FileOutputFormat.getCompressOutput(job)) {
        retVal = fs.create(outputPath, false);
    } else {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());

        retVal = codec.createOutputStream(fs.create(outputPath, false));
    }

    return retVal;
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;/*from w w  w  .  j  a  va  2s .  c o  m*/
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
    Class<? extends CompressionCodec> codecClass;
    CompressionCodec codec = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath.toString() + codec.getDefaultExtension());
    }

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (FileOutputFormat.getCompressOutput(job)) {
        retVal = codec.createOutputStream(fs.create(outputPath, false));
    } else {
        retVal = fs.create(outputPath, false);
    }
    return retVal;
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fs.exists(inputPath);
    } else {//from  w ww .j a  v a  2s  .c o  m
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension()));
    }
}

From source file:io.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
        throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fileSystem.open(inputPath);
    } else {/*  w w w.jav  a2s  . c  om*/
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        inputPath = new Path(inputPath.toString() + codec.getDefaultExtension());

        return codec.createInputStream(fileSystem.open(inputPath));
    }
}

From source file:it.crs4.seal.common.OutputStreamFactory.java

License:Open Source License

public DataOutputStream makeStream(Path path) throws IOException {
    Configuration conf = context.getConfiguration();
    boolean isCompressed = FileOutputFormat.getCompressOutput(context);

    CompressionCodec codec = null;/*  w  w  w.  j  a  v a 2s  . c  o m*/
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(context,
                GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }

    FileSystem fs = path.getFileSystem(conf);

    DataOutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(path, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else
        output = fs.create(path, false);

    return output;
}