List of usage examples for org.apache.hadoop.io.compress CompressionCodec getDefaultExtension
String getDefaultExtension();
From source file:be.ugent.intec.halvade.uploader.mapreduce.MyFastqOutputFormat.java
public RecordWriter<PairedIdWritable, FastqRecord> getRecordWriter(TaskAttemptContext task) throws IOException { Configuration conf = task.getConfiguration(); boolean isCompressed = getCompressOutput(task); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }//from w w w. jav a2s.c om Path file = getDefaultWorkFile(task, extension); FileSystem fs = file.getFileSystem(conf); OutputStream output; if (isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); output = new DataOutputStream(codec.createOutputStream(fileOut)); } else { output = fs.create(file, false); } return new FastqRecordWriter(conf, output); }
From source file:cn.lhfei.hadoop.ch04.FileDecompressor.java
License:Apache License
/** * use case: % hadoop FileDecompressor file.gz * @param args/* ww w . j a va 2s. c om*/ */ public static void main(String[] args) { FileSystem fs = null; String uri = args[0]; Path inputPath = null; Configuration conf = new Configuration(); CompressionCodecFactory factory = null; InputStream in = null; OutputStream out = null; try { fs = FileSystem.get(URI.create(uri), conf); inputPath = new Path(uri); factory = new CompressionCodecFactory(conf); CompressionCodec codec = factory.getCodec(inputPath); if (codec == null) { System.err.println("No codec found for " + uri); System.exit(1); } String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension()); in = codec.createInputStream(fs.open(inputPath)); out = fs.create(new Path(outputUri)); IOUtils.copyBytes(in, out, conf); } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } }
From source file:co.nubetech.hiho.mapreduce.lib.output.FTPTextOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); String ip = conf.get(HIHOConf.FTP_ADDRESS); String portno = conf.get(HIHOConf.FTP_PORT); String usr = conf.get(HIHOConf.FTP_USER); String pwd = conf.get(HIHOConf.FTP_PASSWORD); String dir = getOutputPath(job).toString(); System.out.println("\n\ninside ftpoutputformat" + ip + " " + portno + " " + usr + " " + pwd + " " + dir); String keyValueSeparator = conf.get("mapred.textoutputformat.separator", "\t"); FTPClient f = new FTPClient(); f.connect(ip, Integer.parseInt(portno)); f.login(usr, pwd);/* w ww . j a v a 2s .c o m*/ f.changeWorkingDirectory(dir); f.setFileType(FTP.BINARY_FILE_TYPE); boolean isCompressed = getCompressOutput(job); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); } Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); String filename = file.getName(); if (!isCompressed) { // FSDataOutputStream fileOut = fs.create(file, false); OutputStream os = f.appendFileStream(filename); DataOutputStream fileOut = new DataOutputStream(os); return new FTPLineRecordWriter<K, V>(fileOut, new String(keyValueSeparator), f); } else { // FSDataOutputStream fileOut = fs.create(file, false); OutputStream os = f.appendFileStream(filename); DataOutputStream fileOut = new DataOutputStream(os); return new FTPLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator, f); } }
From source file:co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { boolean isCompressed = getCompressOutput(context); Configuration conf = context.getConfiguration(); String ext = ""; CompressionCodec codec = null; if (isCompressed) { // create the named codec Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); ext = codec.getDefaultExtension(); }/*from www.j a v a2s . c o m*/ Path file = getDefaultWorkFile(context, ext); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); DataOutputStream ostream = fileOut; if (isCompressed) { ostream = new DataOutputStream(codec.createOutputStream(fileOut)); } return new NoKeyRecordWriter<K, V>(ostream); }
From source file:com.alexholmes.hadooputils.sort.DelimitedTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { SortConfig sortConf = new SortConfig(job); boolean isCompressed = getCompressOutput(job); String lineSeparator = sortConf.getRowSeparator("\n"); byte[] hexcode = SortConfig.getHexDelimiter(lineSeparator); lineSeparator = (hexcode != null) ? new String(hexcode, "UTF-8") : lineSeparator; if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new DelimitedLineRecordWriter<K, V>(fileOut, lineSeparator); } else {/*from w w w . j a va 2s . c o m*/ Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new DelimitedLineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), lineSeparator); } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.FileCountTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { Configuration conf = job.getConfiguration(); boolean isCompressed = getCompressOutput(job); String keyValueSeparator = conf.get(SEPERATOR, "\t"); CompressionCodec codec = null; String extension = ""; if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); extension = codec.getDefaultExtension(); }/*from w w w . j a v a 2s. c o m*/ Path file = getDefaultWorkFile(job, extension); FileSystem fs = file.getFileSystem(conf); if (!isCompressed) { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else { FSDataOutputStream fileOut = fs.create(file, false); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.cloudera.flume.handlers.hdfs.CustomDfsSink.java
License:Apache License
/** * Hadoop Compression Codecs that use Native libs require an instance of a * Configuration Object. They require this due to some check against knowing * weather or not the native libs have been loaded. GzipCodec, LzoCodec, * LzopCodec are all codecs that require Native libs. GZipCodec has a slight * exception that if native libs are not accessible it will use Pure Java. * This results in no errors just notices. BZip2Codec is an example codec that * doesn't use native libs.//from w w w . ja v a2s . c om */ @Override public void open() throws IOException { FlumeConfiguration conf = FlumeConfiguration.get(); FileSystem hdfs; String codecName = conf.getCollectorDfsCompressCodec(); CompressionCodec codec = getCodec(conf, codecName); if (codec == null) { dstPath = new Path(path); hdfs = dstPath.getFileSystem(conf); pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName()); writer = pathManager.open(); LOG.info("Creating HDFS file: " + pathManager.getOpenPath()); return; } Compressor cmp = codec.createCompressor(); dstPath = new Path(path + codec.getDefaultExtension()); hdfs = dstPath.getFileSystem(conf); pathManager = new PathManager(hdfs, dstPath.getParent(), dstPath.getName()); writer = pathManager.open(); try { writer = codec.createOutputStream(writer, cmp); } catch (NullPointerException npe) { // tries to find "native" version of codec, if that fails, then tries to // find java version. If there is no java version, the createOutputStream // exits via NPE. We capture this and convert it into a IOE with a more // useful error message. LOG.error("Unable to load compression codec " + codec); throw new IOException("Unable to load compression codec " + codec); } LOG.info("Creating " + codec + " compressed HDFS file: " + pathManager.getOpenPath()); }
From source file:com.cloudera.flume.handlers.hdfs.TestEscapedCustomOutputDfs.java
License:Apache License
void checkOutputFormat(String format, OutputFormat of, String codecName, CompressionCodec codec) throws IOException, InterruptedException { // set the output format. FlumeConfiguration conf = FlumeConfiguration.get(); conf.set(FlumeConfiguration.COLLECTOR_OUTPUT_FORMAT, format); conf.set(FlumeConfiguration.COLLECTOR_DFS_COMPRESS_CODEC, codecName); // build a sink that outputs to that format. File f = FileUtil.mktempdir(); SinkBuilder builder = EscapedCustomDfsSink.builder(); EventSink snk = builder.create(new Context(), "file:///" + f.getPath() + "/sub-%{service}"); Event e = new EventImpl("this is a test message".getBytes()); Attributes.setString(e, "service", "foo"); snk.open();// w w w .jav a2 s.c o m snk.append(e); snk.close(); ByteArrayOutputStream exWriter = new ByteArrayOutputStream(); of.format(exWriter, e); exWriter.close(); String expected = new String(exWriter.toByteArray()); // check the output to make sure it is what we expected. // handle compression codec / extensions when checking. String ext = ""; // file extension if (codec != null) { ext = codec.getDefaultExtension(); } InputStream in = new FileInputStream(f.getPath() + "/sub-foo" + ext); if (codec != null) { in = codec.createInputStream(in); } byte[] buf = new byte[1]; StringBuilder output = new StringBuilder(); // read the file while ((in.read(buf)) > 0) { output.append(new String(buf)); } in.close(); // Must close for windows to delete assertEquals(expected, output.toString()); // This doesn't get deleted in windows but the core test succeeds assertTrue("temp folder successfully deleted", FileUtil.rmr(f)); }
From source file:com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { boolean isCompressed = getCompressOutput(context); Configuration conf = context.getConfiguration(); String ext = ""; CompressionCodec codec = null; if (isCompressed) { // create the named codec Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); codec = ReflectionUtils.newInstance(codecClass, conf); ext = codec.getDefaultExtension(); }/*from w ww .ja v a 2s . c om*/ Path file = getDefaultWorkFile(context, ext); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); DataOutputStream ostream = fileOut; if (isCompressed) { ostream = new DataOutputStream(codec.createOutputStream(fileOut)); } return new RawKeyRecordWriter<K, V>(ostream); }
From source file:com.cloudera.sqoop.TestCompression.java
License:Apache License
public void runTextCompressionTest(CompressionCodec codec, int expectedNum) throws IOException { String[] columns = HsqldbTestServer.getFieldNames(); String[] argv = getArgv(true, columns, codec, "--as-textfile"); runImport(argv);//from w ww . ja v a 2 s .c om Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } FileSystem fs = FileSystem.get(conf); if (codec == null) { codec = new GzipCodec(); } ReflectionUtils.setConf(codec, getConf()); Path p = new Path(getDataFilePath().toString() + codec.getDefaultExtension()); InputStream is = codec.createInputStream(fs.open(p)); BufferedReader r = new BufferedReader(new InputStreamReader(is)); int numLines = 0; while (true) { String ln = r.readLine(); if (ln == null) { break; } numLines++; } r.close(); assertEquals(expectedNum, numLines); }