Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:com.facebook.presto.hive.HivePageSink.java

License:Apache License

private String generateRandomFileName(String outputFormat) {
    // text format files must have the correct extension when compressed
    String extension = "";
    if (HiveConf.getBoolVar(conf, COMPRESSRESULT)
            && HiveIgnoreKeyTextOutputFormat.class.getName().equals(outputFormat)) {
        extension = new DefaultCodec().getDefaultExtension();

        String compressionCodecClass = conf.get("mapred.output.compression.codec");
        if (compressionCodecClass != null) {
            try {
                Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass)
                        .asSubclass(CompressionCodec.class);
                extension = ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension();
            } catch (ClassNotFoundException e) {
                throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                        "Compression codec not found: " + compressionCodecClass, e);
            } catch (RuntimeException e) {
                throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                        "Failed to load compression codec: " + compressionCodecClass, e);
            }//from   w w  w . j  a  v  a  2 s  .  c  o m
        }
    }
    return filePrefix + "_" + randomUUID() + extension;
}

From source file:com.facebook.presto.hive.HiveWriterFactory.java

License:Apache License

public static String getFileExtension(JobConf conf, StorageFormat storageFormat) {
    // text format files must have the correct extension when compressed
    if (!HiveConf.getBoolVar(conf, COMPRESSRESULT)
            || !HiveIgnoreKeyTextOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return "";
    }/*from   w  w  w  .ja  v  a  2 s  . co  m*/

    String compressionCodecClass = conf.get("mapred.output.compression.codec");
    if (compressionCodecClass == null) {
        return new DefaultCodec().getDefaultExtension();
    }

    try {
        Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass)
                .asSubclass(CompressionCodec.class);
        return ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension();
    } catch (ClassNotFoundException e) {
        throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                "Compression codec not found: " + compressionCodecClass, e);
    } catch (RuntimeException e) {
        throw new PrestoException(HIVE_UNSUPPORTED_FORMAT,
                "Failed to load compression codec: " + compressionCodecClass, e);
    }
}

From source file:com.hdfs.concat.crush.CrushReducerParameterizedTest.java

License:Apache License

/**
 * Every file in this unit test is named "file" followed by a number. This method will create a sequence file with as many lines
 * as the number in the file name. The keys in the file will count from one to the number. The values in the file will count
 * from 100n + 1 to 100n + n. This way each file will have distinct contents so long as no two files have the same name.
 *//*  w  w  w  .ja  v  a 2  s. c om*/
private Text writeFile(File srcDir, String fileName, Format format) throws IOException {

    int fileNum = Integer.parseInt(fileName.substring(4));

    File file = new File(srcDir, fileName);

    if (Format.TEXT == format) {
        PrintWriter writer = new PrintWriter(new FileOutputStream(file));

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            writer.printf("%d\t%d\n", k, v);
        }

        writer.close();
    } else {
        CustomWritable key = new CustomWritable();
        CustomWritable value = new CustomWritable();

        DefaultCodec codec = new DefaultCodec();
        codec.setConf(job);

        Writer writer = SequenceFile.createWriter(fs, job, new Path(file.getAbsolutePath()),
                CustomWritable.class, CustomWritable.class, compressionType, codec);

        for (int k = 1, v = 100 * fileNum + 1; k <= fileNum; k++, v++) {
            key.set(k);
            value.set(v);

            writer.append(key, value);
        }

        writer.close();
    }

    return new Text(file.getAbsolutePath());
}

From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java

License:Apache License

@Before
@Override/*  ww w.jav a  2  s. c om*/
public void setUp() throws Exception {
    super.setUp();

    job = createJobConf();

    job.setBoolean("mapred.output.compress", true);
    job.set("mapred.output.compression.type", CompressionType.BLOCK.name());
    job.set("mapred.output.compression.codec", CustomCompressionCodec.class.getName());

    FileSystem fs = getFileSystem();

    Path homeDirPath = fs.makeQualified(new Path("."));

    homeDir = homeDirPath.toUri().getPath();

    fs.delete(homeDirPath, true);

    defaultCodec = new DefaultCodec();
    defaultCodec.setConf(job);

    customCodec = new CustomCompressionCodec();
    customCodec.setConf(job);
}

From source file:com.m6d.filecrush.crush.integration.CrushMapReduceTest.java

License:Apache License

@Before
@Override/*w w  w.  ja  va2s.  co  m*/
public void setUp() throws Exception {
    super.setUp();

    job = createJobConf();

    job.setBoolean("mapreduce.output.fileoutputformat.compress", true);
    job.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.name());
    job.set("mapreduce.output.fileoutputformat.compress.codec", CustomCompressionCodec.class.getName());

    FileSystem fs = getFileSystem();

    Path homeDirPath = fs.makeQualified(new Path("."));

    homeDir = homeDirPath.toUri().getPath();

    fs.delete(homeDirPath, true);

    defaultCodec = new DefaultCodec();
    defaultCodec.setConf(job);

    customCodec = new CustomCompressionCodec();
    customCodec.setConf(job);
}

From source file:com.myhp.hive.rcfile.RCFileGenerator.java

License:Apache License

private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
    int numFields = 0;
    if (format.equals("student")) {
        rand = new Random(numRows);
        numFields = 3;/*from w  w w  . j a  va2 s .  c om*/
    } else if (format.equals("voter")) {
        rand = new Random(1000000000 + numRows);
        numFields = 4;
    } else if (format.equals("alltypes")) {
        rand = new Random(2000000000L + numRows);
        numFields = 10;
    }

    RCFileOutputFormat.setColumnNumber(conf, numFields);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());

    PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));

    for (int j = 0; j < numRows; j++) {
        BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);

        byte[][] fields = null;

        if (format.equals("student")) {
            byte[][] f = { randomName().getBytes("UTF-8"),
                    Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
                    Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("voter")) {
            byte[][] f = { randomName().getBytes("UTF-8"),
                    Integer.valueOf(randomAge()).toString().getBytes("UTF-8"),
                    randomRegistration().getBytes("UTF-8"),
                    Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
            fields = f;
        } else if (format.equals("alltypes")) {
            byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"),
                    Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"),
                    Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"),
                    Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"),
                    Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"),
                    Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"),
                    randomName().getBytes("UTF-8"), randomMap(), randomArray() };
            fields = f;
        }

        for (int i = 0; i < fields.length; i++) {
            BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
            row.set(i, field);
            pw.print(new String(fields[i]));
            if (i != fields.length - 1)
                pw.print("\t");
            else
                pw.println();
        }

        writer.append(row);
    }

    writer.close();
    pw.close();
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestActiveRecordWriters.java

License:Apache License

@Test
public void testWritersLifecycle() throws Exception {
    URI uri = new URI("file:///");
    Configuration conf = new HdfsConfiguration();
    String prefix = "prefix";
    String template = getTestDir().toString()
            + "/${YYYY()}/${MM()}/${DD()}/${hh()}/${mm()}/${ss()}/${record:value('/')}";
    TimeZone timeZone = TimeZone.getTimeZone("UTC");
    long cutOffSecs = 2;
    long cutOffSize = 10000;
    long cutOffRecords = 2;
    HdfsFileType fileType = HdfsFileType.SEQUENCE_FILE;
    DefaultCodec compressionCodec = new DefaultCodec();
    compressionCodec.setConf(conf);/*from w  w  w .  ja  v a 2  s.c  o  m*/
    SequenceFile.CompressionType compressionType = SequenceFile.CompressionType.BLOCK;
    String keyEL = "uuid()";
    DataGeneratorFactory generatorFactory = new DummyDataGeneratorFactory(null);
    RecordWriterManager mgr = new RecordWriterManager(uri, conf, prefix, template, timeZone, cutOffSecs,
            cutOffSize, cutOffRecords, fileType, compressionCodec, compressionType, keyEL, generatorFactory,
            ContextInfoCreator.createTargetContext(HdfsDTarget.class, "testWritersLifecycle", false,
                    OnRecordError.TO_ERROR, null),
            "dirPathTemplate");
    Assert.assertTrue(mgr.validateDirTemplate("g", "dirPathTemplate", new ArrayList<Stage.ConfigIssue>()));
    ActiveRecordWriters writers = new ActiveRecordWriters(mgr);

    Date now = new Date();

    // record older than cut off
    Date recordDate = new Date(now.getTime() - 3 * 1000 - 1);
    Record record = RecordCreator.create();
    record.set(Field.create("a"));
    Assert.assertNull(writers.get(now, recordDate, record));

    recordDate = new Date(now.getTime());
    RecordWriter writer = writers.get(now, recordDate, record);
    Assert.assertNotNull(writer);
    Path tempPath = writer.getPath();
    writer.write(record);
    writers.release(writer);
    //writer should still be open
    Assert.assertFalse(writer.isClosed());

    writer = writers.get(now, recordDate, record);
    writer.write(record);
    writers.release(writer);
    //writer should be close because of going over record count threshold
    Assert.assertTrue(writer.isClosed());

    //we should be able to get a new writer as the cutoff didn't kick in yet
    writer = writers.get(now, recordDate, record);
    Assert.assertNotNull(writer);
    writers.purge();
    //purging should not close the writer as the cutoff didn't kick in yet
    Assert.assertFalse(writer.isClosed());

    Thread.sleep(3001);
    writers.purge();
    //purging should  close the writer as the cutoff kicked in yet
    Assert.assertTrue(writer.isClosed());

    //verifying closeAll() closes writers
    writer = writers.get(new Date(), new Date(), record);
    Assert.assertNotNull(writer);
    writers.closeAll();
    Assert.assertTrue(writer.isClosed());
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testCompressionCodec() throws Exception {
    testPath(new DefaultCodec());
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testTextFileCompression() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(new Configuration());
    testTextFile(codec);/*from ww w .j a v  a  2  s . co  m*/
}

From source file:com.streamsets.pipeline.stage.destination.hdfs.writer.TestRecordWriterManager.java

License:Apache License

@Test
public void testSeqFileCompression() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(new Configuration());
    testSeqFile(codec, SequenceFile.CompressionType.RECORD);
    testSeqFile(codec, SequenceFile.CompressionType.BLOCK);
}