List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension
String getDefaultExtension();
From source file:it.crs4.seal.tsv_sort.TextValueOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext task) throws IOException { Configuration conf = task.getConfiguration(); boolean isCompressed = getCompressOutput(task); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }//w w w . j ava2s .c o m Path file = getDefaultWorkFile(task, extension); FileSystem fs = file.getFileSystem(conf); DataOutputStream output; if (isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); output = new DataOutputStream(codec.createOutputStream(fileOut)); } else output = fs.create(file, false); return new ValueRecordWriter(output); }
From source file:jp.ac.u.tokyo.m.pig.udf.store.FreeEncodingPigTextOutputFormat.java
License:Apache License
@SuppressWarnings("rawtypes") @Override/*from ww w . j a v a2 s.co m*/ public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new FreeEncodingPigLineRecordWriter(fileOut, mFieldDelimiter, mEncoding); } else { FSDataOutputStream fileOut = fs.create(file, false); return new FreeEncodingPigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)), mFieldDelimiter, mEncoding); } }
From source file:matrixFormat.MatrixOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*from w ww. ja v a 2 s. co m*/ Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new MatrixRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new MatrixRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:name.abhijitsarkar.hadoop.io.IOUtils.java
License:Open Source License
/** * /*from w w w. j ava 2 s . co m*/ * @param uncompressedURI * The file to be archived * @param codecName * The codec to be used for archiving * @param conf * Job configuration * @return The archive URI * @throws Exception * If fails to create the archive */ public static URI compressFile(final URI uncompressedURI, final String codecName, final Configuration conf) throws Exception { /* * Hadoop 1.1.2 does not have a CompressionCodecFactory#getCodecByName method. Instantiating GzipCodec by using * new operator throws NPE (probably Hadoop bug). */ final CompressionCodec codec = getCodecByClassName(GzipCodec.class.getName(), conf); final Path uncompressedPath = new Path(uncompressedURI); String archiveName = addExtension(uncompressedPath.getName(), codec.getDefaultExtension(), true); final Path archivePath = new Path(uncompressedPath.getParent(), archiveName); LOGGER.debug("uncompressedURI: {}.", uncompressedURI); LOGGER.debug("archiveURI: {}", archivePath.toString()); OutputStream outputStream = null; InputStream inputStream = null; CompressionOutputStream out = null; try { outputStream = new FileOutputStream(archivePath.toUri().getPath()); inputStream = new FileInputStream(uncompressedURI.getPath()); out = codec.createOutputStream(outputStream); org.apache.hadoop.io.IOUtils.copyBytes(inputStream, out, conf, false); out.finish(); } catch (IOException e) { throw e; } finally { closeStreams(inputStream, outputStream, out); } return archivePath.toUri(); }
From source file:org.apache.druid.indexer.Utils.java
License:Apache License
public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) throws IOException { OutputStream retVal;/*www . j a v a2s . com*/ FileSystem fs = outputPath.getFileSystem(job.getConfiguration()); Class<? extends CompressionCodec> codecClass; CompressionCodec codec = null; if (FileOutputFormat.getCompressOutput(job)) { codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); outputPath = new Path(outputPath + codec.getDefaultExtension()); } if (fs.exists(outputPath)) { if (deleteExisting) { fs.delete(outputPath, false); } else { throw new ISE("outputPath[%s] must not exist.", outputPath); } } if (FileOutputFormat.getCompressOutput(job)) { retVal = codec.createOutputStream(fs.create(outputPath, false)); } else { retVal = fs.create(outputPath, false); } return retVal; }
From source file:org.apache.druid.indexer.Utils.java
License:Apache License
public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fs.exists(inputPath); } else {//from w w w. j av a 2s . c o m Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); return fs.exists(new Path(inputPath + codec.getDefaultExtension())); } }
From source file:org.apache.druid.indexer.Utils.java
License:Apache License
public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem) throws IOException { if (!FileOutputFormat.getCompressOutput(job)) { return fileSystem.open(inputPath); } else {//w w w .j a v a 2s . co m Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); inputPath = new Path(inputPath + codec.getDefaultExtension()); return codec.createInputStream(fileSystem.open(inputPath)); } }
From source file:org.apache.giraph.io.formats.GiraphTextOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { String extension = ""; CompressionCodec codec = null; Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*w w w . j av a 2 s . com*/ Path file = getDefaultWorkFile(job, extension); /* adjust the path */ FSDataOutputStream fileOut; FileSystem fs = file.getFileSystem(conf); String subdir = getSubdir(); if (!subdir.isEmpty()) { Path subdirPath = new Path(subdir); Path subdirAbsPath = new Path(file.getParent(), subdirPath); Path vertexFile = new Path(subdirAbsPath, file.getName()); fileOut = fs.create(vertexFile, false); } else { fileOut = fs.create(file, false); } String separator = "\t"; if (!isCompressed) { return new LineRecordWriter<Text, Text>(fileOut, separator); } else { DataOutputStream out = new DataOutputStream(codec.createOutputStream(fileOut)); return new LineRecordWriter<Text, Text>(out, separator); } }
From source file:org.apache.hama.bsp.TextOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignored, BSPJob job, String name) throws IOException { boolean isCompressed = getCompressOutput(job); String keyValueSeparator = job.getConfiguration().get("bsp.textoutputformat.separator", "\t"); if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else {// w w w .j a v a 2 s .co m Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration()); // build the filename including the extension Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:org.apache.hawq.pxf.plugins.hdfs.LineBreakAccessor.java
License:Apache License
/** * Opens file for write./*ww w .ja v a2 s . com*/ */ @Override public boolean openForWrite() throws Exception { String fileName = inputData.getDataSource(); String compressCodec = inputData.getUserProperty("COMPRESSION_CODEC"); CompressionCodec codec = null; conf = new Configuration(); fs = FileSystem.get(conf); // get compression codec if (compressCodec != null) { codec = HdfsUtilities.getCodec(conf, compressCodec); String extension = codec.getDefaultExtension(); fileName += extension; } file = new Path(fileName); if (fs.exists(file)) { throw new IOException("file " + file.toString() + " already exists, can't write data"); } org.apache.hadoop.fs.Path parent = file.getParent(); if (!fs.exists(parent)) { fs.mkdirs(parent); LOG.debug("Created new dir " + parent.toString()); } // create output stream - do not allow overwriting existing file createOutputStream(file, codec); return true; }