Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:Importer.java

License:Open Source License

public static void copyFile(File file) throws Exception {
    //    String TEST_PREFIX = "";
    File destFile = new File(outDir, file.getName() + ".seq");
    Path dest = new Path(destFile.getAbsolutePath());

    Configuration conf = new Configuration();
    FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")),
            conf);/*  w ww  .j  av a2s .  c  o m*/
    CompressionCodec codec = new DefaultCodec();
    fileSys.mkdirs(dest.getParent());
    FSDataOutputStream outputStr = fileSys.create(dest);
    seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, codec);
    String filename = file.getName();
    InputStream in = new BufferedInputStream(new FileInputStream(file));
    if (filename.endsWith(".bz2")) {
        in.read();
        in.read(); //snarf header
        in = new CBZip2InputStream(in);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII"));

    System.out.println("working on file " + file);
    int records = 0;
    long bytes = 0, bytes_since_status = 0;
    long startTime = System.currentTimeMillis();
    String s = null;
    Text content = new Text();
    while ((s = br.readLine()) != null) {
        if (s.startsWith("---END.OF.DOCUMENT---")) {
            Text name = new Text(hash(content));
            seqFileWriter.append(name, content);
            records++;
            content = new Text();
        } else {
            byte[] line_as_bytes = (s + " ").getBytes();
            for (byte b : line_as_bytes) {
                assert b < 128 : "found an unexpected high-bit set";
            }

            content.append(line_as_bytes, 0, line_as_bytes.length);
            bytes += line_as_bytes.length;
            /*
            bytes_since_status += line_as_bytes.length;
            if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB
              System.err.print('.');
              bytes_since_status = 0;
            }*/
        }
    } //end while
    if (content.getLength() > 5) {
        Text name = new Text(hash(content));
        seqFileWriter.append(name, content);
        records++;
    }
    totalBytes += bytes;
    totalRecords += records;
    long time = (System.currentTimeMillis() - startTime) / 1000 + 1;
    long kbSec = bytes / 1024 / time;
    System.out.println(new java.util.Date());
    System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time
            + " seconds (" + kbSec + " KB/sec).");
    in.close();
    seqFileWriter.close();
    outputStr.close();
}

From source file:TestCodec.java

License:Open Source License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(conf);//from   ww w  .j a v  a  2s . com
    DataOutputBuffer chunksWriteBuffer = new DataOutputBuffer();
    CompressionOutputStream compressionOutputStream = codec.createOutputStream(chunksWriteBuffer);

    DataInputBuffer chunkReadBuffer = new DataInputBuffer();
    CompressionInputStream compressionInputStream = codec.createInputStream(chunkReadBuffer);
    String str = "laksjldfkjalskdjfl;aksjdflkajsldkfjalksjdflkajlsdkfjlaksjdflka";
    compressionOutputStream.write(str.getBytes());
    compressionOutputStream.finish();
    byte[] data = chunksWriteBuffer.getData();
    System.out.println(str.length());
    System.out.println(chunksWriteBuffer.getLength());

    chunkReadBuffer.reset(data, chunksWriteBuffer.getLength());

    DataOutputBuffer dob = new DataOutputBuffer();
    IOUtils.copyBytes(compressionInputStream, dob, conf);
    System.out.println(dob.getData());

}

From source file:com.alexholmes.hadooputils.combine.avro.AvroFileGenerator.java

License:Apache License

public int run(final String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println(/*from  w w  w. ja va2 s  .co  m*/
                String.format("Usage: %s: <file path> <number of records>", AvroFileGenerator.class.getName()));
        return 1;
    }

    Path file = new Path(args[0]);
    int numRecords = Integer.valueOf(args[1]);

    FileSystem fs = FileSystem.get(super.getConf());

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {
        for (int i = 0; i < numRecords; i++) {
            writer.append(new Text("k" + i), new Text("v" + i));
        }
    } finally {
        writer.close();
    }

    return 0;
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

public void writeSequenceFile(Path path) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {/*from w  ww  .j  av a2 s  .c  o m*/
        writer.append(key, value);
    } finally {
        writer.close();
    }
}

From source file:com.alexholmes.hadooputils.combine.seqfile.SequenceFileGenerator.java

License:Apache License

public int run(final String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println(String.format("Usage: %s: <file path> <number of records>",
                SequenceFileGenerator.class.getName()));
        return 1;
    }/*w w  w  .  j a  v a  2  s .c om*/

    Path file = new Path(args[0]);
    int numRecords = Integer.valueOf(args[1]);

    FileSystem fs = FileSystem.get(super.getConf());

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {
        for (int i = 0; i < numRecords; i++) {
            writer.append(new Text("k" + i), new Text("v" + i));
        }
    } finally {
        writer.close();
    }

    return 0;
}

From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormatTest.java

License:Apache License

/**
 * compressed output.//from w w w  .  j a va2 s  .  com
 * @throws Exception if failed
 */
@Test
public void output_compressed() throws Exception {
    LocalFileSystem fs = FileSystem.getLocal(conf);
    Path path = new Path(folder.newFile("testing").toURI());
    try (ModelOutput<StringOption> out = format.codec(new DefaultCodec()).createOutput(StringOption.class, fs,
            path, new Counter())) {
        out.write(new StringOption("Hello, world!"));
    }

    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
        assertThat(reader.getCompressionCodec(), instanceOf(DefaultCodec.class));
    }
}

From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtilTest.java

License:Apache License

/**
 * Creates a compressed sequence file.//from ww w . j a  v  a 2 s  .  c  om
 * @throws Exception if failed
 */
@Test
public void write_compressed() throws Exception {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(conf);

    Path path = new Path("testing");

    LongWritable key = new LongWritable();
    LongWritable value = new LongWritable();
    try (OutputStream out = new FileOutputStream(fs.pathToFile(path));
            SequenceFile.Writer writer = SequenceFileUtil.openWriter(new BufferedOutputStream(out), conf,
                    key.getClass(), value.getClass(), codec);) {
        for (long i = 0; i < 300000; i++) {
            key.set(i);
            value.set(i + 1);
            writer.append(key, value);
        }
    }

    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
        for (long i = 0; i < 300000; i++) {
            assertThat(reader.next(key, value), is(true));
            assertThat(key.get(), is(i));
            assertThat(value.get(), is(i + 1));
        }
        assertThat(reader.next(key, value), is(false));
    }
}

From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

License:Apache License

@Test
public void testWhyFail() throws IOException {

    // There a was a failure case using :
    FlumeConfiguration conf = FlumeConfiguration.get();
    Path path = new Path("file:///tmp/testfile");
    FileSystem hdfs = path.getFileSystem(conf);

    // writing/*from w  w  w.  ja  va2s  . c om*/
    FSDataOutputStream dos = hdfs.create(path);
    hdfs.deleteOnExit(path);

    // this version's Writer has ownOutputStream=false.
    Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class,
            SequenceFile.CompressionType.NONE, new DefaultCodec());

    Event e = new EventImpl("EVENT".getBytes());

    writer.append(new WriteableEventKey(e), new WriteableEvent(e));
    writer.sync();
    writer.close();

    dos.close(); // It is strange that I have to close the underlying
    // FSDataOutputStream.

    // WTF: nothing written by this writer!
    FileStatus stats = hdfs.getFileStatus(path);
    assertTrue(stats.getLen() > 0);
    // it should have written something but it failed.
}

From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java

License:Apache License

/**
 * Test to write few log lines, compress using default, write to disk, read
 * back the compressed file and verify the written lines.
 *
 * @throws InterruptedException/*  w ww .ja va  2s. c o  m*/
 */
@Test
public void testDefaultCodec() throws IOException, InterruptedException {
    DefaultCodec codec = new DefaultCodec();
    codec.setConf(FlumeConfiguration.get()); // default needs conf
    checkOutputFormat("syslog", new SyslogEntryFormat(), "DefaultCodec", codec);
}

From source file:com.cloudera.flume.handlers.seqfile.SequenceFileOutputFormat.java

License:Apache License

public SequenceFileOutputFormat() {
    this(SequenceFile.getCompressionType(FlumeConfiguration.get()), new DefaultCodec());
}