List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#putAll(String, String, Collection)}. * * <p>/* www. ja v a2 s . c o m*/ * This implementation does not support putting the state objects into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void putAll(String storeName, String tableName, Collection<T> states) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); for (T state : states) { writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath); } }
From source file:hip.ch3.seqfile.writable.seqfile.SequenceFileStockWriter.java
/** * Write the sequence file.//from w w w . j a va 2 s . c o m * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); SequenceFile.Writer writer = //<co id="ch03_comment_seqfile_write1"/> SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputPath), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(StockPriceWritable.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec())); try { Text key = new Text(); for (String line : FileUtils.readLines(inputFile)) { StockPriceWritable stock = StockPriceWritable.fromLine(line); System.out.println("Stock = " + stock); key.set(stock.getSymbol()); writer.append(key, stock); //<co id="ch03_comment_seqfile_write4"/> } } finally { writer.close(); } return 0; }
From source file:io.transwarp.flume.sink.HDFSCompressedDataStream.java
License:Apache License
@Override public void open(String filePath) throws IOException { DefaultCodec defCodec = new DefaultCodec(); CompressionType cType = CompressionType.BLOCK; open(filePath, defCodec, cType);/*from w ww . java 2s. c o m*/ }
From source file:io.warp10.continuum.Dump.java
License:Apache License
@Override public int run(String[] args) throws Exception { String dumpurl = args[0];//from ww w . j a v a 2 s .com String seqfile = args[1]; // // Open output SequenceFile // Configuration conf = getConf(); // // Open output file // FSDataOutputStream out = null; if ("-".equals(args[args.length - 1])) { out = new FSDataOutputStream(System.out, null); } SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.compression(CompressionType.BLOCK, new DefaultCodec()), SequenceFile.Writer.keyClass(BytesWritable.class), SequenceFile.Writer.valueClass(BytesWritable.class), null == out ? SequenceFile.Writer.file(new Path(args[args.length - 1])) : SequenceFile.Writer.stream(out)); InputStream is = null; if (dumpurl.startsWith("http://") || dumpurl.startsWith("https://")) { URLConnection conn = new URL(dumpurl).openConnection(); conn.setDoInput(true); conn.connect(); is = conn.getInputStream(); } else if ("-".equals(dumpurl)) { is = System.in; } else { is = new FileInputStream(dumpurl); } BufferedReader br = new BufferedReader(new InputStreamReader(is)); TSerializer serializer = new TSerializer(new TCompactProtocol.Factory()); while (true) { String line = br.readLine(); if (null == line) { break; } // // Extract ts// class{labels} // String meta = line.substring(0, line.indexOf('}') + 1); // // Parse a dummy line 'ts// class{labels} T' to retrieve the Metadata // GTSEncoder encoder = GTSHelper.parse(null, meta + " T"); Metadata metadata = encoder.getMetadata(); // Retrieve potential dummy elevation which will encode the number of datapoints encoded GTSDecoder decoder = encoder.getDecoder(); decoder.next(); long count = decoder.getElevation(); // // Create a GTSWrapper // GTSWrapper wrapper = new GTSWrapper(); wrapper.setMetadata(metadata); wrapper.setBase(encoder.getBaseTimestamp()); if (GeoTimeSerie.NO_ELEVATION != count) { wrapper.setCount(count); } else { wrapper.setCount(0L); } // // Retrieve encoded datapoints // byte[] datapoints = OrderPreservingBase64 .decode(line.substring(line.indexOf('}') + 2).getBytes(Charsets.UTF_8)); writer.append(new BytesWritable(serializer.serialize(wrapper)), new BytesWritable(datapoints)); } writer.close(); br.close(); is.close(); return 0; }
From source file:io.warp10.standalone.StandaloneChunkedMemoryStore.java
License:Apache License
public void dump(String path) throws IOException { long nano = System.nanoTime(); int gts = 0;/*from w w w . j a v a 2 s. c om*/ long bytes = 0L; Configuration conf = new Configuration(); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); CompressionCodec Codec = new DefaultCodec(); SequenceFile.Writer writer = null; SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path)); SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass()); SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass()); SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec); writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom); TSerializer serializer = new TSerializer(new TCompactProtocol.Factory()); try { for (Entry<BigInteger, InMemoryChunkSet> entry : this.series.entrySet()) { gts++; Metadata metadata = this.directoryClient.getMetadataById(entry.getKey()); List<GTSDecoder> decoders = entry.getValue().getDecoders(); //GTSEncoder encoder = entry.getValue().fetchEncoder(now, this.chunkcount * this.chunkspan); for (GTSDecoder decoder : decoders) { GTSWrapper wrapper = new GTSWrapper(metadata); wrapper.setBase(decoder.getBaseTimestamp()); wrapper.setCount(decoder.getCount()); byte[] data = serializer.serialize(wrapper); key.set(data, 0, data.length); ByteBuffer bb = decoder.getBuffer(); ByteBuffer rwbb = ByteBuffer.allocate(bb.remaining()); rwbb.put(bb); rwbb.rewind(); value.set(rwbb.array(), rwbb.arrayOffset(), rwbb.remaining()); bytes += key.getLength() + value.getLength(); writer.append(key, value); } } } catch (IOException ioe) { ioe.printStackTrace(); throw ioe; } catch (Exception e) { e.printStackTrace(); throw new IOException(e); } writer.close(); nano = System.nanoTime() - nano; System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms."); }
From source file:io.warp10.standalone.StandaloneMemoryStore.java
License:Apache License
public void dump(String path) throws IOException { long nano = System.nanoTime(); int gts = 0;//from w w w .j a v a 2 s.com long bytes = 0L; Configuration conf = new Configuration(); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); CompressionCodec Codec = new DefaultCodec(); SequenceFile.Writer writer = null; SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path)); SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass()); SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass()); SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec); writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom); TSerializer serializer = new TSerializer(new TCompactProtocol.Factory()); try { for (Entry<BigInteger, GTSEncoder> entry : this.series.entrySet()) { gts++; Metadata metadata = this.directoryClient.getMetadataById(entry.getKey()); GTSWrapper wrapper = new GTSWrapper(metadata); GTSEncoder encoder = entry.getValue(); wrapper.setBase(encoder.getBaseTimestamp()); wrapper.setCount(encoder.getCount()); byte[] data = serializer.serialize(wrapper); key.set(data, 0, data.length); data = encoder.getBytes(); value.set(data, 0, data.length); bytes += key.getLength() + value.getLength(); writer.append(key, value); } /* for (Entry<BigInteger,Metadata> entry: this.metadatas.entrySet()) { gts++; byte[] data = serializer.serialize(entry.getValue()); key.set(data, 0, data.length); GTSEncoder encoder = this.series.get(entry.getKey()); data = encoder.getBytes(); value.set(data, 0, data.length); bytes += key.getLength() + value.getLength(); writer.append(key, value); } */ } catch (IOException ioe) { ioe.printStackTrace(); throw ioe; } catch (Exception e) { e.printStackTrace(); throw new IOException(e); } writer.close(); nano = System.nanoTime() - nano; System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms."); }
From source file:ldbc.snb.datagen.serializer.UpdateEventSerializer.java
License:Open Source License
public UpdateEventSerializer(Configuration conf, String fileNamePrefix, int reducerId, int numPartitions) throws IOException { conf_ = conf;//from w w w. j ava 2s.c om reducerId_ = reducerId; stringBuffer_ = new StringBuffer(512); data_ = new ArrayList<String>(); list_ = new ArrayList<String>(); currentEvent_ = new UpdateEvent(-1, -1, UpdateEvent.UpdateEventType.NO_EVENT, new String("")); numPartitions_ = numPartitions; stats_ = new UpdateStreamStats(); fileNamePrefix_ = fileNamePrefix; try { streamWriter_ = new SequenceFile.Writer[numPartitions_]; FileContext fc = FileContext.getFileContext(conf); for (int i = 0; i < numPartitions_; ++i) { Path outFile = new Path(fileNamePrefix_ + "_" + i); streamWriter_[i] = SequenceFile.createWriter(fc, conf, outFile, UpdateEventKey.class, Text.class, CompressionType.NONE, new DefaultCodec(), new SequenceFile.Metadata(), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), Options.CreateOpts.checksumParam(Options.ChecksumOpt.createDisabled())); FileSystem fs = FileSystem.get(conf); Path propertiesFile = new Path(fileNamePrefix_ + ".properties"); if (fs.exists(propertiesFile)) { FSDataInputStream file = fs.open(propertiesFile); Properties properties = new Properties(); properties.load(file); stats_.minDate_ = Long .parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time")); stats_.maxDate_ = Long .parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time")); stats_.count_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events")); file.close(); fs.delete(propertiesFile, true); } } } catch (IOException e) { throw e; } }
From source file:nl.surfsara.newsreader.loader.WriteNewsreaderDocs.java
License:Apache License
private void initWriter(Configuration conf, String path) throws IOException { CompressionCodec Codec = new DefaultCodec(); writer = null;// w ww. j a v a2 s. co m Option optPath = SequenceFile.Writer.file(new Path(path)); Option optKey = SequenceFile.Writer.keyClass(Text.class); Option optVal = SequenceFile.Writer.valueClass(Text.class); Option optCom = SequenceFile.Writer.compression(CompressionType.BLOCK, Codec); writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom); }
From source file:org.apache.accumulo.server.logger.LogWriter.java
License:Apache License
@Override public LogCopyInfo startCopy(TInfo info, AuthInfo credentials, final String localLog, final String fullyQualifiedFileName, final boolean sort) { log.info("Copying " + localLog + " to " + fullyQualifiedFileName); final long t1 = System.currentTimeMillis(); try {/*from w w w .j a va 2 s . co m*/ Long id = file2id.get(localLog); if (id != null) close(info, id); } catch (NoSuchLogIDException e) { log.error("Unexpected error thrown", e); throw new RuntimeException(e); } File file; try { file = new File(findLocalFilename(localLog)); log.info(file.getAbsoluteFile().toString()); } catch (FileNotFoundException ex) { throw new RuntimeException(ex); } long result = file.length(); copyThreadPool.execute(new Runnable() { @Override public void run() { Thread.currentThread().setName("Copying " + localLog + " to shared file system"); for (int i = 0; i < 3; i++) { try { if (sort) { copySortLog(localLog, fullyQualifiedFileName); } else { copyLog(localLog, fullyQualifiedFileName); } return; } catch (IOException e) { log.error("error during copy", e); } UtilWaitThread.sleep(1000); } log.error("Unable to copy file to DFS, too many retries " + localLog); try { fs.create(new Path(fullyQualifiedFileName + ".failed")).close(); } catch (IOException ex) { log.error("Unable to create failure flag file", ex); } long t2 = System.currentTimeMillis(); if (metrics.isEnabled()) metrics.add(LogWriterMetrics.copy, (t2 - t1)); } private void copySortLog(String localLog, String fullyQualifiedFileName) throws IOException { final long SORT_BUFFER_SIZE = acuConf.getMemoryInBytes(Property.LOGGER_SORT_BUFFER_SIZE); FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw()); Path dest = new Path(fullyQualifiedFileName + ".recovered"); log.debug("Sorting log file to DSF " + dest); fs.mkdirs(dest); int part = 0; Reader reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf()); try { final ArrayList<Pair<LogFileKey, LogFileValue>> kv = new ArrayList<Pair<LogFileKey, LogFileValue>>(); long memorySize = 0; while (true) { final long position = reader.getPosition(); final LogFileKey key = new LogFileKey(); final LogFileValue value = new LogFileValue(); try { if (!reader.next(key, value)) break; } catch (EOFException e) { log.warn("Unexpected end of file reading write ahead log " + localLog); break; } kv.add(new Pair<LogFileKey, LogFileValue>(key, value)); memorySize += reader.getPosition() - position; if (memorySize > SORT_BUFFER_SIZE) { writeSortedEntries(dest, part++, kv); kv.clear(); memorySize = 0; } } if (!kv.isEmpty()) writeSortedEntries(dest, part++, kv); fs.create(new Path(dest, "finished")).close(); } finally { reader.close(); } } private void writeSortedEntries(Path dest, int part, final List<Pair<LogFileKey, LogFileValue>> kv) throws IOException { String path = dest + String.format("/part-r-%05d", part); log.debug("Writing partial log file to DSF " + path); log.debug("Sorting"); Span span = Trace.start("Logger sort"); span.data("logfile", dest.getName()); Collections.sort(kv, new Comparator<Pair<LogFileKey, LogFileValue>>() { @Override public int compare(Pair<LogFileKey, LogFileValue> o1, Pair<LogFileKey, LogFileValue> o2) { return o1.getFirst().compareTo(o2.getFirst()); } }); span.stop(); span = Trace.start("Logger write"); span.data("logfile", dest.getName()); MapFile.Writer writer = new MapFile.Writer(fs.getConf(), fs, path, LogFileKey.class, LogFileValue.class); short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION); fs.setReplication(new Path(path + "/" + MapFile.DATA_FILE_NAME), replication); fs.setReplication(new Path(path + "/" + MapFile.INDEX_FILE_NAME), replication); try { for (Pair<LogFileKey, LogFileValue> entry : kv) writer.append(entry.getFirst(), entry.getSecond()); } finally { writer.close(); span.stop(); } } private void copyLog(final String localLog, final String fullyQualifiedFileName) throws IOException { Path dest = new Path(fullyQualifiedFileName + ".copy"); log.debug("Copying log file to DSF " + dest); fs.delete(dest, true); LogFileKey key = new LogFileKey(); LogFileValue value = new LogFileValue(); Writer writer = null; Reader reader = null; try { short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION); writer = SequenceFile.createWriter(fs, fs.getConf(), dest, LogFileKey.class, LogFileValue.class, fs.getConf().getInt("io.file.buffer.size", 4096), replication, fs.getDefaultBlockSize(), SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata()); FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw()); reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf()); while (reader.next(key, value)) { writer.append(key, value); } } catch (IOException ex) { log.warn("May have a partial copy of a recovery file: " + localLog, ex); } finally { if (reader != null) reader.close(); if (writer != null) writer.close(); } // Make file appear in the shared file system as the target name only after it is completely copied fs.rename(dest, new Path(fullyQualifiedFileName)); log.info("Copying " + localLog + " complete"); } }); return new LogCopyInfo(result, null); }
From source file:org.apache.gobblin.metastore.FsStateStore.java
License:Apache License
/** * See {@link StateStore#put(String, String, T)}. * * <p>/*ww w .j a va 2 s. co m*/ * This implementation does not support putting the state object into an existing store as * append is to be supported by the Hadoop SequenceFile (HADOOP-7139). * </p> */ @Override public void put(String storeName, String tableName, T state) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); } Closer closer = Closer.create(); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } if (this.useTmpFileForPut) { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); renamePath(tmpTablePath, tablePath); } }