Example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension.

Prototype

String getDefaultExtension();

Source Link

Document

Get the default filename extension for this kind of compression.

Usage

From source file:it.crs4.seal.tsv_sort.TextValueOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = task.getConfiguration();
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//w  w w .  j ava2s .c o m

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    DataOutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else
        output = fs.create(file, false);

    return new ValueRecordWriter(output);
}

From source file:jp.ac.u.tokyo.m.pig.udf.store.FreeEncodingPigTextOutputFormat.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/*from ww  w . j  a  v  a2  s.co m*/
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(fileOut, mFieldDelimiter, mEncoding);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)),
                mFieldDelimiter, mEncoding);
    }
}

From source file:matrixFormat.MatrixOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from w  ww.  ja v  a  2  s. co  m*/
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new MatrixRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new MatrixRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:name.abhijitsarkar.hadoop.io.IOUtils.java

License:Open Source License

/**
 * /*from   w  w w. j  ava 2  s  .  co m*/
 * @param uncompressedURI
 *            The file to be archived
 * @param codecName
 *            The codec to be used for archiving
 * @param conf
 *            Job configuration
 * @return The archive URI
 * @throws Exception
 *             If fails to create the archive
 */
public static URI compressFile(final URI uncompressedURI, final String codecName, final Configuration conf)
        throws Exception {
    /*
     * Hadoop 1.1.2 does not have a CompressionCodecFactory#getCodecByName method. Instantiating GzipCodec by using
     * new operator throws NPE (probably Hadoop bug).
     */
    final CompressionCodec codec = getCodecByClassName(GzipCodec.class.getName(), conf);
    final Path uncompressedPath = new Path(uncompressedURI);

    String archiveName = addExtension(uncompressedPath.getName(), codec.getDefaultExtension(), true);

    final Path archivePath = new Path(uncompressedPath.getParent(), archiveName);

    LOGGER.debug("uncompressedURI: {}.", uncompressedURI);
    LOGGER.debug("archiveURI: {}", archivePath.toString());

    OutputStream outputStream = null;
    InputStream inputStream = null;
    CompressionOutputStream out = null;

    try {
        outputStream = new FileOutputStream(archivePath.toUri().getPath());
        inputStream = new FileInputStream(uncompressedURI.getPath());

        out = codec.createOutputStream(outputStream);
        org.apache.hadoop.io.IOUtils.copyBytes(inputStream, out, conf, false);
        out.finish();
    } catch (IOException e) {
        throw e;
    } finally {
        closeStreams(inputStream, outputStream, out);
    }

    return archivePath.toUri();
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;/*www  . j  a v  a2s .  com*/
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
    Class<? extends CompressionCodec> codecClass;
    CompressionCodec codec = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath + codec.getDefaultExtension());
    }

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (FileOutputFormat.getCompressOutput(job)) {
        retVal = codec.createOutputStream(fs.create(outputPath, false));
    } else {
        retVal = fs.create(outputPath, false);
    }
    return retVal;
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fs.exists(inputPath);
    } else {//from  w w  w. j av  a  2s . c o  m
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        return fs.exists(new Path(inputPath + codec.getDefaultExtension()));
    }
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
        throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fileSystem.open(inputPath);
    } else {//w w  w .j  a v  a 2s  . co m
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        inputPath = new Path(inputPath + codec.getDefaultExtension());

        return codec.createInputStream(fileSystem.open(inputPath));
    }
}

From source file:org.apache.giraph.io.formats.GiraphTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    String extension = "";
    CompressionCodec codec = null;
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*w w w . j  av  a 2  s  . com*/
    Path file = getDefaultWorkFile(job, extension);

    /* adjust the path */
    FSDataOutputStream fileOut;
    FileSystem fs = file.getFileSystem(conf);
    String subdir = getSubdir();
    if (!subdir.isEmpty()) {
        Path subdirPath = new Path(subdir);
        Path subdirAbsPath = new Path(file.getParent(), subdirPath);
        Path vertexFile = new Path(subdirAbsPath, file.getName());
        fileOut = fs.create(vertexFile, false);
    } else {
        fileOut = fs.create(file, false);
    }

    String separator = "\t";

    if (!isCompressed) {
        return new LineRecordWriter<Text, Text>(fileOut, separator);
    } else {
        DataOutputStream out = new DataOutputStream(codec.createOutputStream(fileOut));
        return new LineRecordWriter<Text, Text>(out, separator);
    }
}

From source file:org.apache.hama.bsp.TextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, BSPJob job, String name) throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.getConfiguration().get("bsp.textoutputformat.separator", "\t");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job.getConfiguration());
        FSDataOutputStream fileOut = fs.create(file);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {// w w  w .j  a v  a  2 s .co  m
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job.getConfiguration());
        FSDataOutputStream fileOut = fs.create(file);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:org.apache.hawq.pxf.plugins.hdfs.LineBreakAccessor.java

License:Apache License

/**
 * Opens file for write./*ww w  .ja  v  a2  s  . com*/
 */
@Override
public boolean openForWrite() throws Exception {

    String fileName = inputData.getDataSource();
    String compressCodec = inputData.getUserProperty("COMPRESSION_CODEC");
    CompressionCodec codec = null;

    conf = new Configuration();
    fs = FileSystem.get(conf);

    // get compression codec
    if (compressCodec != null) {
        codec = HdfsUtilities.getCodec(conf, compressCodec);
        String extension = codec.getDefaultExtension();
        fileName += extension;
    }

    file = new Path(fileName);

    if (fs.exists(file)) {
        throw new IOException("file " + file.toString() + " already exists, can't write data");
    }
    org.apache.hadoop.fs.Path parent = file.getParent();
    if (!fs.exists(parent)) {
        fs.mkdirs(parent);
        LOG.debug("Created new dir " + parent.toString());
    }

    // create output stream - do not allow overwriting existing file
    createOutputStream(file, codec);

    return true;
}