List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getCompressOutput
public static boolean getCompressOutput(JobContext job)
From source file:com.asakusafw.runtime.stage.output.TemporaryOutputFormat.java
License:Apache License
/** * Creates a new {@link RecordWriter} to output temporary data. * @param <V> value type//from w ww .j a v a2 s . co m * @param context current context * @param name output name * @param dataType value type * @return the created writer * @throws IOException if failed to create a new {@link RecordWriter} * @throws InterruptedException if interrupted * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> RecordWriter<NullWritable, V> createRecordWriter(TaskAttemptContext context, String name, Class<V> dataType) throws IOException, InterruptedException { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } if (name == null) { throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$ } if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } CompressionCodec codec = null; Configuration conf = context.getConfiguration(); if (FileOutputFormat.getCompressOutput(context)) { Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } FileOutputCommitter committer = getOutputCommitter(context); final Path file = new Path(committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$ final ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec); return new RecordWriter<NullWritable, V>() { @Override public void write(NullWritable key, V value) throws IOException { out.write(value); } @Override public void close(TaskAttemptContext ignored) throws IOException { out.close(); } @Override public String toString() { return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$ } }; }
From source file:com.bigfishgames.biginsights.upsight.mapreduce.MyAvroOutputFormatBase.java
License:Apache License
/** * Gets the configured compression codec from the task context. * * @param context The task attempt context. * @return The compression codec to use for the output Avro container file. *///from w w w . ja va 2 s. co m protected static CodecFactory getCompressionCodec(TaskAttemptContext context) { if (FileOutputFormat.getCompressOutput(context)) { // Default to deflate compression. int deflateLevel = context.getConfiguration().getInt( org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.XZ_LEVEL_KEY, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = context.getConfiguration().get(AvroJob.CONF_OUTPUT_CODEC); if (outputCodec == null) { String compressionCodec = context.getConfiguration().get("mapred.output.compression.codec"); String avroCodecName = HadoopCodecFactory.getAvroCodecName(compressionCodec); if (avroCodecName != null) { context.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, avroCodecName); return HadoopCodecFactory.fromHadoopString(compressionCodec); } else { return CodecFactory.deflateCodec(deflateLevel); } } else if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } // No compression. return CodecFactory.nullCodec(); }
From source file:com.ci.backports.avro.mapreduce.AvroOutputFormat.java
License:Apache License
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { Schema schema = AvroJob.getOutputSchema(context.getConfiguration()); if (schema == null) { throw new RuntimeException("AvroOutputFormat requires an output schema."); }/*from ww w . j a v a 2 s .c o m*/ final DataFileWriter<T> writer = new DataFileWriter<T>(new SpecificDatumWriter<T>()); if (FileOutputFormat.getCompressOutput(context)) { int level = context.getConfiguration().getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(context.getConfiguration()).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> record, NullWritable ignore) throws IOException { writer.append(record.datum()); } public void close(TaskAttemptContext context) throws IOException { writer.close(); } }; }
From source file:com.datasalt.pangool.tuplemr.avro.AvroOutputFormat.java
License:Apache License
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job, String codecName, int deflateLevel) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel) : CodecFactory.fromString(codecName); writer.setCodec(factory);//from w w w. ja va 2 s .co m } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String, String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } }
From source file:com.linkedin.cubert.pig.piggybank.storage.avro.PigAvroOutputFormat.java
License:Apache License
@Override public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { if (schema == null) throw new IOException("Must provide a schema"); Configuration conf = context.getConfiguration(); DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema)); if (FileOutputFormat.getCompressOutput(context)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/*from ww w. j a va 2s . c o m*/ } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, Math.max(conf.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); Path path = getDefaultWorkFile(context, EXT); writer.create(schema, path.getFileSystem(conf).create(path)); return new PigAvroRecordWriter(writer); }
From source file:com.metamx.druid.indexer.Utils.java
License:Open Source License
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal;/* w w w . j a v a 2s . c om*/ FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (!FileOutputFormat.getCompressOutput(job)) { retVal = fs.create(outputPath, false); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath.toString() + codec.getDefaultExtension()); retVal = codec.createOutputStream(fs.create(outputPath, false)); } return retVal; }
From source file:io.druid.indexer.Utils.java
License:Apache License
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal;/*from w w w . j a va 2s . c o m*/ FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); Class<? extends CompressionCodec> codecClass; CompressionCodec codec = null; if (FileOutputFormat.getCompressOutput(job)) { codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath.toString() + codec.getDefaultExtension()); } if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (FileOutputFormat.getCompressOutput(job)) { retVal = codec.createOutputStream(fs.create(outputPath, false)); } else { retVal = fs.create(outputPath, false); } return retVal; }
From source file:io.druid.indexer.Utils.java
License:Apache License
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else {//from w ww .j a v a 2s .c o m Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath.toString() + codec.getDefaultExtension())); } }
From source file:io.druid.indexer.Utils.java
License:Apache License
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else {/* w w w.jav a2s . c om*/ Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath.toString() + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
From source file:it.crs4.seal.common.OutputStreamFactory.java
License:Open Source License
public DataOutputStream makeStream(Path path) throws IOException { Configuration conf = context.getConfiguration(); boolean isCompressed = FileOutputFormat.getCompressOutput(context); CompressionCodec codec = null;/* w w w. j a v a 2s . c o m*/ String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(context, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } FileSystem fs = path.getFileSystem(conf); DataOutputStream output; if (isCompressed) { FSDataOutputStream fileOut = fs.create(path, false); output = new DataOutputStream(codec.createOutputStream(fileOut)); } else output = fs.create(path, false); return output; }