List of usage examples for org.apache.hadoop.mapred FileOutputFormat getCompressOutput
public static boolean getCompressOutput(JobConf conf)
From source file:com.datascience.hadoop.CsvOutputFormat.java
License:Apache License
@Override public RecordWriter<LongWritable, ListWritable<Text>> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable progress) throws IOException { String charsetName = conf.get(CHARSET); Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8; Path path;//w w w . java 2 s.co m if (FileOutputFormat.getCompressOutput(conf)) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(conf, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); path = FileOutputFormat.getTaskOutputPath(conf, name + codec.getDefaultExtension()); } else { path = FileOutputFormat.getTaskOutputPath(conf, name); } return new CsvRecordWriter(new OutputStreamWriter(path.getFileSystem(conf).create(path, progress), charset), createFormat(conf)); }
From source file:com.ricemap.spateDB.core.GridRecordWriter.java
License:Apache License
/** * Creates an output stream that will be used to write the final cell file * @param cellFilePath/* w w w .jav a 2 s . co m*/ * @return * @throws IOException */ protected OutputStream createFinalCellStream(Path cellFilePath) throws IOException { OutputStream cellStream; boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf); if (!isCompressed) { // Create new file cellStream = fileSystem.create(cellFilePath, true, fileSystem.getConf().getInt("io.file.buffer.size", 4096), fileSystem.getDefaultReplication(outDir), this.blockSize); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf); // Open a stream to the output file cellStream = fileSystem.create(cellFilePath, true, fileSystem.getConf().getInt("io.file.buffer.size", 4096), fileSystem.getDefaultReplication(outDir), this.blockSize); // Encode the output stream using the codec cellStream = new DataOutputStream(codec.createOutputStream(cellStream)); } return cellStream; }
From source file:com.ricemap.spateDB.core.GridRecordWriter.java
License:Apache License
/** * Returns path to a file in which the final cell will be written. * @param column/*from w ww. jav a 2 s .c om*/ * @param row * @return * @throws IOException */ protected Path getFinalCellPath(int cellIndex) throws IOException { Path path = null; do { String filename = counter == 0 ? String.format("data_%05d", cellIndex) : String.format("data_%05d_%d", cellIndex, counter); boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf); if (isCompressed) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf); filename += codec.getDefaultExtension(); } path = getFilePath(filename); counter++; } while (fileSystem.exists(path)); return path; }
From source file:com.tomslabs.grid.avro.TextTypedBytesToAvroOutputFormat.java
License:Apache License
public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(schema)); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); }/*from w w w .ja v a2 s . co m*/ // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroRecordWriter(writer, schema); }
From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java
License:Open Source License
/** * Creates an output stream that will be used to write the final cell file * @param cellFilePath//from ww w .j av a 2 s . co m * @return * @throws IOException */ protected OutputStream createFinalCellStream(Path cellFilePath) throws IOException { OutputStream cellStream; boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf); if (!isCompressed) { // Create new file cellStream = fileSystem.create(cellFilePath, true, fileSystem.getConf().getInt("io.file.buffer.size", 4096), fileSystem.getDefaultReplication(cellFilePath), this.blockSize); } else { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf); // Open a stream to the output file cellStream = fileSystem.create(cellFilePath, true, fileSystem.getConf().getInt("io.file.buffer.size", 4096), fileSystem.getDefaultReplication(cellFilePath), this.blockSize); // Encode the output stream using the codec cellStream = new DataOutputStream(codec.createOutputStream(cellStream)); } return cellStream; }
From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java
License:Open Source License
/** * Returns path to a file in which the final cell will be written. * @param cellIndex The index of the cell to retrieve its output path. * @return/*from w w w. j ava 2s . c o m*/ * @throws IOException */ protected Path getFinalCellPath(int cellIndex) throws IOException { Path path; do { String filename = counter == 0 ? String.format("data_%05d", cellIndex) : String.format("data_%05d_%d", cellIndex, counter); boolean isCompressed = jobConf != null && FileOutputFormat.getCompressOutput(jobConf); if (isCompressed) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jobConf, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, jobConf); filename += codec.getDefaultExtension(); } path = getFilePath(filename); counter++; } while (fileSystem.exists(path)); return path; }
From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java
License:Apache License
static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/*w w w . j a v a2s . c om*/ } writer.setSyncInterval( job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } }
From source file:org.apache.avro.mapred.AvroOutputFormat.java
License:Apache License
/** This will select the correct compression codec from the JobConf. * The order of selection is as follows: * <ul>/* ww w . j a v a 2s . co m*/ * <li>If mapred.output.compress is true then look for codec otherwise no compression</li> * <li>Use avro.output.codec if populated</li> * <li>Next use mapred.output.compression.codec if populated</li> * <li>If not default to Deflate Codec</li> * </ul> */ static CodecFactory getCodecFactory(JobConf job) { CodecFactory factory = null; if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC); if (codecName == null) { String codecClassName = job.get("mapred.output.compression.codec", null); String avroCodecName = HadoopCodecFactory.getAvroCodecName(codecClassName); if (codecClassName != null && avroCodecName != null) { factory = HadoopCodecFactory.fromHadoopString(codecClassName); job.set(AvroJob.OUTPUT_CODEC, avroCodecName); return factory; } else { return CodecFactory.deflateCodec(level); } } else { if (codecName.equals(DEFLATE_CODEC)) { factory = CodecFactory.deflateCodec(level); } else { factory = CodecFactory.fromString(codecName); } } } return factory; }
From source file:org.apache.avro.mapred.tether.TetherOutputFormat.java
License:Apache License
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); }/* w w w .j a v a 2 s . c o m*/ Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
From source file:org.apache.pig.impl.util.avro.AvroRecordWriter.java
License:Apache License
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/* w ww. j av a 2 s . com*/ } // Do max as core-default.xml has io.file.buffer.size as 4K writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, Math.max(job.getInt("io.file.buffer.size", DEFAULT_SYNC_INTERVAL), DEFAULT_SYNC_INTERVAL))); // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } }