List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension
String getDefaultExtension();
From source file:org.apache.sqoop.connector.hdfs.TestHdfsBase.java
License:Apache License
protected void createTextInput(String indir, Class<? extends CompressionCodec> clz, int numberOfFiles, int numberOfRows) throws IOException, InstantiationException, IllegalAccessException { Configuration conf = new Configuration(); CompressionCodec codec = null; String extension = ""; if (clz != null) { codec = clz.newInstance();/* w w w . j a va 2 s .c o m*/ if (codec instanceof Configurable) { ((Configurable) codec).setConf(conf); } extension = codec.getDefaultExtension(); } int index = 1; for (int fi = 0; fi < numberOfFiles; fi++) { String fileName = indir + "/" + UUID.randomUUID() + extension; OutputStream filestream = FileUtils.create(fileName); BufferedWriter filewriter; if (codec != null) { filewriter = new BufferedWriter(new OutputStreamWriter( codec.createOutputStream(filestream, codec.createCompressor()), "UTF-8")); } else { filewriter = new BufferedWriter(new OutputStreamWriter(filestream, "UTF-8")); } for (int ri = 0; ri < numberOfRows; ri++) { String row = index + "," + (double) index + ",'" + index + "'"; filewriter.write(row + HdfsConstants.DEFAULT_RECORD_DELIMITER); index++; } filewriter.close(); } }
From source file:org.apache.tajo.storage.compress.CodecPool.java
License:Apache License
/** * Get a {@link Compressor} for the given {@link CompressionCodec} from the * pool or a new one.//www . j a v a2 s. com * * @param codec * the <code>CompressionCodec</code> for which to get the * <code>Compressor</code> * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor * @return <code>Compressor</code> for the given <code>CompressionCodec</code> * from the pool or a new one */ public static Compressor getCompressor(CompressionCodec codec, Configuration conf) { Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType()); if (compressor == null) { compressor = codec.createCompressor(); LOG.info("Got brand-new compressor [" + codec.getDefaultExtension() + "]"); } else { compressor.reinit(conf); if (LOG.isDebugEnabled()) { LOG.debug("Got recycled compressor"); } } return compressor; }
From source file:org.apache.tajo.storage.compress.CodecPool.java
License:Apache License
/** * Get a {@link Decompressor} for the given {@link CompressionCodec} from the * pool or a new one.//from ww w . j a va2s .c om * * @param codec * the <code>CompressionCodec</code> for which to get the * <code>Decompressor</code> * @return <code>Decompressor</code> for the given * <code>CompressionCodec</code> the pool or a new one */ public static Decompressor getDecompressor(CompressionCodec codec) { Decompressor decompressor = borrow(DECOMPRESSOR_POOL, codec.getDecompressorType()); if (decompressor == null) { decompressor = codec.createDecompressor(); LOG.info("Got brand-new decompressor [" + codec.getDefaultExtension() + "]"); } else { if (LOG.isDebugEnabled()) { LOG.debug("Got recycled decompressor"); } } return decompressor; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.CommonFileOutputFormat.java
License:Apache License
protected DataOutputStream getDataOutputStream(final TaskAttemptContext job) throws IOException, InterruptedException { final Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { final Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*from www . j av a 2 s. c om*/ final Path file = super.getDefaultWorkFile(job, extension); final FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { return new DataOutputStream(fs.create(file, false)); } else { return new DataOutputStream(codec.createOutputStream(fs.create(file, false))); } }
From source file:org.cloudgraph.mapreduce.GraphXmlOutputFormat.java
License:Apache License
@Override public RecordWriter<LongWritable, GraphWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }//from ww w . ja v a2 s .c o m Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new GraphXmlRecordWriter(fileOut, job); } else { FSDataOutputStream fileOut = fs.create(file, false); return new GraphXmlRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), job); } }
From source file:org.godhuli.rhipe.RXTextOutputFormat.java
License:Apache License
public RecordWriter<RHBytesWritable, RHBytesWritable> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); String fieldSeparator = conf.get("mapred.field.separator", " "); boolean usekey = conf.get("mapred.textoutputformat.usekey").equals("TRUE") ? true : false; String newline = conf.get("rhipe.eol.sequence"); String squote = conf.get("rhipe_string_quote"); if (squote == null) squote = ""; CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }// w ww. j a v a 2s . c om Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new RXTextRecordWriter(fileOut, keyValueSeparator, fieldSeparator, squote, usekey, newline); } else { FSDataOutputStream fileOut = fs.create(file, false); return new RXTextRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, fieldSeparator, squote, usekey, newline); } }
From source file:org.ojai.json.mapreduce.JSONFileOutputFormat.java
License:Apache License
@Override public RecordWriter<LongWritable, Document> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*w w w.j av a2 s . c om*/ Path path = getDefaultWorkFile(job, extension); FileSystem fs = path.getFileSystem(conf); FSDataOutputStream out = fs.create(path, false); if (!isCompressed) { return new JSONFileOutputRecordWriter(out); } else { return new JSONFileOutputRecordWriter(new DataOutputStream(codec.createOutputStream(out))); } }
From source file:org.saarus.service.hadoop.util.JsonOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean isCompressed = getCompressOutput(job); String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t"); String[] headers = job.getStrings("column.headers"); if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new JsonRecordWriter<K, V>(fileOut, keyValueSeparator, headers); } else {//from w w w .ja v a 2 s . c o m Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); // build the filename including the extension Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, headers); } }
From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); boolean isCompressed = getCompressOutput(context); CompressionCodec codec = null; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); // only support gzip and bzip2 compression if (codecClass.equals(BZip2Codec.class) || codecClass.equals(GzipCodec.class)) { codec = ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } else {//from ww w . ja v a 2 s .co m LOG.warn("neither gzip nor bzip2 compression codec found - disabling compression"); isCompressed = false; extension = ""; } } char delimiter = conf.get(FTP_EXPORT_CVS_DELIMITER, "\t").charAt(0); String[] header = conf.getStrings(FTP_EXPORT_HEADER_COLUMNS); Path file = getDefaultWorkFile(context, extension); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); RecordWriter<K, V> writer; if (conf.get(FTP_EXPORT_FILE_TYPE).equals(FileOutputType.csv.toString())) { if (!isCompressed) { writer = new CSVRecordWriter<K, V>(fileOut, header, delimiter); } else { writer = new CSVRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), header, delimiter); } } else if (conf.get(FTP_EXPORT_FILE_TYPE).equals(FileOutputType.json.toString())) { if (!isCompressed) { writer = new JsonRecordWriter<K, V>(fileOut); } else { writer = new JsonRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut))); } } else { throw new IllegalArgumentException("unknown file output type"); } return writer; }
From source file:org.seqdoop.hadoop_bam.FastqOutputFormat.java
License:Open Source License
public RecordWriter<Text, SequencedFragment> getRecordWriter(TaskAttemptContext task) throws IOException { Configuration conf = ContextUtil.getConfiguration(task); boolean isCompressed = getCompressOutput(task); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*from w w w .j a v a 2 s . com*/ Path file = getDefaultWorkFile(task, extension); FileSystem fs = file.getFileSystem(conf); OutputStream output; if (isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); output = new DataOutputStream(codec.createOutputStream(fileOut)); } else output = fs.create(file, false); return new FastqRecordWriter(conf, output); }