Example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension.

Prototype

String getDefaultExtension();

Source Link

Document

Get the default filename extension for this kind of compression.

Usage

From source file:it.crs4.seal.tsv_sort.TextValueOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext task) throws IOException {
    Configuration conf = task.getConfiguration();
    boolean isCompressed = getCompressOutput(task);

    CompressionCodec codec = null;
    String extension = "";

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }//w  w w .  j ava2s .c o m

    Path file = getDefaultWorkFile(task, extension);
    FileSystem fs = file.getFileSystem(conf);

    DataOutputStream output;

    if (isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        output = new DataOutputStream(codec.createOutputStream(fileOut));
    } else
        output = fs.create(file, false);

    return new ValueRecordWriter(output);
}

From source file:jp.ac.u.tokyo.m.pig.udf.store.FreeEncodingPigTextOutputFormat.java

License:Apache License

@SuppressWarnings("rawtypes")
@Override/*from ww  w . j  a  v  a2  s.co m*/
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(fileOut, mFieldDelimiter, mEncoding);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new FreeEncodingPigLineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut)),
                mFieldDelimiter, mEncoding);
    }
}

From source file:matrixFormat.MatrixOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t");
    CompressionCodec codec = null;
    String extension = "";
    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*from w  ww.  ja v  a  2  s. co  m*/
    Path file = getDefaultWorkFile(job, extension);
    FileSystem fs = file.getFileSystem(conf);
    if (!isCompressed) {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new MatrixRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {
        FSDataOutputStream fileOut = fs.create(file, false);
        return new MatrixRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:name.abhijitsarkar.hadoop.io.IOUtils.java

License:Open Source License

/**
 * /*from   w  w w. j  ava 2  s  .  co m*/
 * @param uncompressedURI
 *            The file to be archived
 * @param codecName
 *            The codec to be used for archiving
 * @param conf
 *            Job configuration
 * @return The archive URI
 * @throws Exception
 *             If fails to create the archive
 */
public static URI compressFile(final URI uncompressedURI, final String codecName, final Configuration conf)
        throws Exception {
    /*
     * Hadoop 1.1.2 does not have a CompressionCodecFactory#getCodecByName method. Instantiating GzipCodec by using
     * new operator throws NPE (probably Hadoop bug).
     */
    final CompressionCodec codec = getCodecByClassName(GzipCodec.class.getName(), conf);
    final Path uncompressedPath = new Path(uncompressedURI);

    String archiveName = addExtension(uncompressedPath.getName(), codec.getDefaultExtension(), true);

    final Path archivePath = new Path(uncompressedPath.getParent(), archiveName);

    LOGGER.debug("uncompressedURI: {}.", uncompressedURI);
    LOGGER.debug("archiveURI: {}", archivePath.toString());

    OutputStream outputStream = null;
    InputStream inputStream = null;
    CompressionOutputStream out = null;

    try {
        outputStream = new FileOutputStream(archivePath.toUri().getPath());
        inputStream = new FileInputStream(uncompressedURI.getPath());

        out = codec.createOutputStream(outputStream);
        org.apache.hadoop.io.IOUtils.copyBytes(inputStream, out, conf, false);
        out.finish();
    } catch (IOException e) {
        throw e;
    } finally {
        closeStreams(inputStream, outputStream, out);
    }

    return archivePath.toUri();
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting)
        throws IOException {
    OutputStream retVal;/*www  . j  a v  a2s .  com*/
    FileSystem fs = outputPath.getFileSystem(job.getConfiguration());
    Class<? extends CompressionCodec> codecClass;
    CompressionCodec codec = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        codecClass = FileOutputFormat.getOutputCompressorClass(job, GzipCodec.class);
        codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        outputPath = new Path(outputPath + codec.getDefaultExtension());
    }

    if (fs.exists(outputPath)) {
        if (deleteExisting) {
            fs.delete(outputPath, false);
        } else {
            throw new ISE("outputPath[%s] must not exist.", outputPath);
        }
    }

    if (FileOutputFormat.getCompressOutput(job)) {
        retVal = codec.createOutputStream(fs.create(outputPath, false));
    } else {
        retVal = fs.create(outputPath, false);
    }
    return retVal;
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static boolean exists(JobContext job, FileSystem fs, Path inputPath) throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fs.exists(inputPath);
    } else {//from  w w  w. j av  a  2s . c o  m
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        return fs.exists(new Path(inputPath + codec.getDefaultExtension()));
    }
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static InputStream openInputStream(JobContext job, Path inputPath, final FileSystem fileSystem)
        throws IOException {
    if (!FileOutputFormat.getCompressOutput(job)) {
        return fileSystem.open(inputPath);
    } else {//w w  w .j  a v  a 2s  . co m
        Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(job,
                GzipCodec.class);
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        inputPath = new Path(inputPath + codec.getDefaultExtension());

        return codec.createInputStream(fileSystem.open(inputPath));
    }
}

From source file:org.apache.giraph.io.formats.GiraphTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job)
        throws IOException, InterruptedException {
    String extension = "";
    CompressionCodec codec = null;
    Configuration conf = job.getConfiguration();
    boolean isCompressed = getCompressOutput(job);

    if (isCompressed) {
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
        extension = codec.getDefaultExtension();
    }/*w w w . j  av  a 2  s  . com*/
    Path file = getDefaultWorkFile(job, extension);

    /* adjust the path */
    FSDataOutputStream fileOut;
    FileSystem fs = file.getFileSystem(conf);
    String subdir = getSubdir();
    if (!subdir.isEmpty()) {
        Path subdirPath = new Path(subdir);
        Path subdirAbsPath = new Path(file.getParent(), subdirPath);
        Path vertexFile = new Path(subdirAbsPath, file.getName());
        fileOut = fs.create(vertexFile, false);
    } else {
        fileOut = fs.create(file, false);
    }

    String separator = "\t";

    if (!isCompressed) {
        return new LineRecordWriter<Text, Text>(fileOut, separator);
    } else {
        DataOutputStream out = new DataOutputStream(codec.createOutputStream(fileOut));
        return new LineRecordWriter<Text, Text>(out, separator);
    }
}

From source file:org.apache.hama.bsp.TextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, BSPJob job, String name) throws IOException {
    boolean isCompressed = getCompressOutput(job);
    String keyValueSeparator = job.getConfiguration().get("bsp.textoutputformat.separator", "\t");
    if (!isCompressed) {
        Path file = FileOutputFormat.getTaskOutputPath(job, name);
        FileSystem fs = file.getFileSystem(job.getConfiguration());
        FSDataOutputStream fileOut = fs.create(file);
        return new LineRecordWriter<K, V>(fileOut, keyValueSeparator);
    } else {// w w  w .j  a v  a  2 s .co  m
        Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
        // create the named codec
        CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job.getConfiguration());
        // build the filename including the extension
        Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension());
        FileSystem fs = file.getFileSystem(job.getConfiguration());
        FSDataOutputStream fileOut = fs.create(file);
        return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)),
                keyValueSeparator);
    }
}

From source file:org.apache.hawq.pxf.plugins.hdfs.LineBreakAccessor.java

License:Apache License

/**
 * Opens file for write./*ww w  .ja  v  a2  s  . com*/
 */
@Override
public boolean openForWrite() throws Exception {

    String fileName = inputData.getDataSource();
    String compressCodec = inputData.getUserProperty("COMPRESSION_CODEC");
    CompressionCodec codec = null;

    conf = new Configuration();
    fs = FileSystem.get(conf);

    // get compression codec
    if (compressCodec != null) {
        codec = HdfsUtilities.getCodec(conf, compressCodec);
        String extension = codec.getDefaultExtension();
        fileName += extension;
    }

    file = new Path(fileName);

    if (fs.exists(file)) {
        throw new IOException("file " + file.toString() + " already exists, can't write data");
    }
    org.apache.hadoop.fs.Path parent = file.getParent();
    if (!fs.exists(parent)) {
        fs.mkdirs(parent);
        LOG.debug("Created new dir " + parent.toString());
    }

    // create output stream - do not allow overwriting existing file
    createOutputStream(file, codec);

    return true;
}