List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:Importer.java
License:Open Source License
public static void copyFile(File file) throws Exception { // String TEST_PREFIX = ""; File destFile = new File(outDir, file.getName() + ".seq"); Path dest = new Path(destFile.getAbsolutePath()); Configuration conf = new Configuration(); FileSystem fileSys = org.apache.hadoop.fs.FileSystem.get(new java.net.URI(conf.get("fs.default.name")), conf);/* w ww .j av a2s . c o m*/ CompressionCodec codec = new DefaultCodec(); fileSys.mkdirs(dest.getParent()); FSDataOutputStream outputStr = fileSys.create(dest); seqFileWriter = SequenceFile.createWriter(conf, outputStr, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, codec); String filename = file.getName(); InputStream in = new BufferedInputStream(new FileInputStream(file)); if (filename.endsWith(".bz2")) { in.read(); in.read(); //snarf header in = new CBZip2InputStream(in); } BufferedReader br = new BufferedReader(new InputStreamReader(in, "US-ASCII")); System.out.println("working on file " + file); int records = 0; long bytes = 0, bytes_since_status = 0; long startTime = System.currentTimeMillis(); String s = null; Text content = new Text(); while ((s = br.readLine()) != null) { if (s.startsWith("---END.OF.DOCUMENT---")) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; content = new Text(); } else { byte[] line_as_bytes = (s + " ").getBytes(); for (byte b : line_as_bytes) { assert b < 128 : "found an unexpected high-bit set"; } content.append(line_as_bytes, 0, line_as_bytes.length); bytes += line_as_bytes.length; /* bytes_since_status += line_as_bytes.length; if(bytes_since_status > 10 * 1024 * 1024) { //every 10 MB System.err.print('.'); bytes_since_status = 0; }*/ } } //end while if (content.getLength() > 5) { Text name = new Text(hash(content)); seqFileWriter.append(name, content); records++; } totalBytes += bytes; totalRecords += records; long time = (System.currentTimeMillis() - startTime) / 1000 + 1; long kbSec = bytes / 1024 / time; System.out.println(new java.util.Date()); System.out.println("File " + file.getName() + " " + records + " records, " + bytes + " bytes in " + time + " seconds (" + kbSec + " KB/sec)."); in.close(); seqFileWriter.close(); outputStr.close(); }
From source file:TestCodec.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); DefaultCodec codec = new DefaultCodec(); codec.setConf(conf);//from ww w .j a v a 2s . com DataOutputBuffer chunksWriteBuffer = new DataOutputBuffer(); CompressionOutputStream compressionOutputStream = codec.createOutputStream(chunksWriteBuffer); DataInputBuffer chunkReadBuffer = new DataInputBuffer(); CompressionInputStream compressionInputStream = codec.createInputStream(chunkReadBuffer); String str = "laksjldfkjalskdjfl;aksjdflkajsldkfjalksjdflkajlsdkfjlaksjdflka"; compressionOutputStream.write(str.getBytes()); compressionOutputStream.finish(); byte[] data = chunksWriteBuffer.getData(); System.out.println(str.length()); System.out.println(chunksWriteBuffer.getLength()); chunkReadBuffer.reset(data, chunksWriteBuffer.getLength()); DataOutputBuffer dob = new DataOutputBuffer(); IOUtils.copyBytes(compressionInputStream, dob, conf); System.out.println(dob.getData()); }
From source file:com.alexholmes.hadooputils.combine.avro.AvroFileGenerator.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 2) { System.err.println(/*from w w w. ja va2 s .co m*/ String.format("Usage: %s: <file path> <number of records>", AvroFileGenerator.class.getName())); return 1; } Path file = new Path(args[0]); int numRecords = Integer.valueOf(args[1]); FileSystem fs = FileSystem.get(super.getConf()); SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (int i = 0; i < numRecords; i++) { writer.append(new Text("k" + i), new Text("v" + i)); } } finally { writer.close(); } return 0; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
public void writeSequenceFile(Path path) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try {/*from w ww .j av a2 s .c o m*/ writer.append(key, value); } finally { writer.close(); } }
From source file:com.alexholmes.hadooputils.combine.seqfile.SequenceFileGenerator.java
License:Apache License
public int run(final String[] args) throws Exception { if (args.length != 2) { System.err.println(String.format("Usage: %s: <file path> <number of records>", SequenceFileGenerator.class.getName())); return 1; }/*w w w . j a v a 2 s .c om*/ Path file = new Path(args[0]); int numRecords = Integer.valueOf(args[1]); FileSystem fs = FileSystem.get(super.getConf()); SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (int i = 0; i < numRecords; i++) { writer.append(new Text("k" + i), new Text("v" + i)); } } finally { writer.close(); } return 0; }
From source file:com.asakusafw.runtime.directio.hadoop.SequenceFileFormatTest.java
License:Apache License
/** * compressed output.//from w w w . j a va2 s . com * @throws Exception if failed */ @Test public void output_compressed() throws Exception { LocalFileSystem fs = FileSystem.getLocal(conf); Path path = new Path(folder.newFile("testing").toURI()); try (ModelOutput<StringOption> out = format.codec(new DefaultCodec()).createOutput(StringOption.class, fs, path, new Counter())) { out.write(new StringOption("Hello, world!")); } try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) { assertThat(reader.getCompressionCodec(), instanceOf(DefaultCodec.class)); } }
From source file:com.asakusafw.runtime.io.sequencefile.SequenceFileUtilTest.java
License:Apache License
/** * Creates a compressed sequence file.//from ww w . j a v a 2 s . c om * @throws Exception if failed */ @Test public void write_compressed() throws Exception { DefaultCodec codec = new DefaultCodec(); codec.setConf(conf); Path path = new Path("testing"); LongWritable key = new LongWritable(); LongWritable value = new LongWritable(); try (OutputStream out = new FileOutputStream(fs.pathToFile(path)); SequenceFile.Writer writer = SequenceFileUtil.openWriter(new BufferedOutputStream(out), conf, key.getClass(), value.getClass(), codec);) { for (long i = 0; i < 300000; i++) { key.set(i); value.set(i + 1); writer.append(key, value); } } try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) { for (long i = 0; i < 300000; i++) { assertThat(reader.next(key, value), is(true)); assertThat(key.get(), is(i)); assertThat(value.get(), is(i + 1)); } assertThat(reader.next(key, value), is(false)); } }
From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java
License:Apache License
@Test public void testWhyFail() throws IOException { // There a was a failure case using : FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("file:///tmp/testfile"); FileSystem hdfs = path.getFileSystem(conf); // writing/*from w w w. ja va2s . c om*/ FSDataOutputStream dos = hdfs.create(path); hdfs.deleteOnExit(path); // this version's Writer has ownOutputStream=false. Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class, SequenceFile.CompressionType.NONE, new DefaultCodec()); Event e = new EventImpl("EVENT".getBytes()); writer.append(new WriteableEventKey(e), new WriteableEvent(e)); writer.sync(); writer.close(); dos.close(); // It is strange that I have to close the underlying // FSDataOutputStream. // WTF: nothing written by this writer! FileStatus stats = hdfs.getFileStatus(path); assertTrue(stats.getLen() > 0); // it should have written something but it failed. }
From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java
License:Apache License
/** * Test to write few log lines, compress using default, write to disk, read * back the compressed file and verify the written lines. * * @throws InterruptedException/* w ww .ja va 2s. c o m*/ */ @Test public void testDefaultCodec() throws IOException, InterruptedException { DefaultCodec codec = new DefaultCodec(); codec.setConf(FlumeConfiguration.get()); // default needs conf checkOutputFormat("syslog", new SyslogEntryFormat(), "DefaultCodec", codec); }
From source file:com.cloudera.flume.handlers.seqfile.SequenceFileOutputFormat.java
License:Apache License
public SequenceFileOutputFormat() { this(SequenceFile.getCompressionType(FlumeConfiguration.get()), new DefaultCodec()); }