Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:gobblin.metastore.FsStateStore.java

License:Apache License

/**
 * See {@link StateStore#putAll(String, String, Collection)}.
 *
 * <p>/*  www. ja  v a2  s  . c  o  m*/
 *   This implementation does not support putting the state objects into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    }

    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
                Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        for (T state : states) {
            writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
        }
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }

    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        HadoopUtils.renamePath(this.fs, tmpTablePath, tablePath);
    }
}

From source file:hip.ch3.seqfile.writable.seqfile.SequenceFileStockWriter.java

/**
 * Write the sequence file.//from   w  w w . j a  va  2 s  . c o  m
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {
    Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
    int result = cli.runCmd();

    if (result != 0) {
        return result;
    }

    File inputFile = new File(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
    Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
    Configuration conf = super.getConf();
    SequenceFile.Writer writer = //<co id="ch03_comment_seqfile_write1"/>
            SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputPath),
                    SequenceFile.Writer.keyClass(Text.class),
                    SequenceFile.Writer.valueClass(StockPriceWritable.class),
                    SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
    try {
        Text key = new Text();
        for (String line : FileUtils.readLines(inputFile)) {

            StockPriceWritable stock = StockPriceWritable.fromLine(line);
            System.out.println("Stock = " + stock);

            key.set(stock.getSymbol());

            writer.append(key, stock); //<co id="ch03_comment_seqfile_write4"/>

        }
    } finally {
        writer.close();
    }
    return 0;
}

From source file:io.transwarp.flume.sink.HDFSCompressedDataStream.java

License:Apache License

@Override
public void open(String filePath) throws IOException {
    DefaultCodec defCodec = new DefaultCodec();
    CompressionType cType = CompressionType.BLOCK;
    open(filePath, defCodec, cType);/*from  w  ww  . java 2s.  c  o  m*/
}

From source file:io.warp10.continuum.Dump.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    String dumpurl = args[0];//from  ww w  .  j  a  v a 2  s  .com
    String seqfile = args[1];

    //
    // Open output SequenceFile
    //

    Configuration conf = getConf();

    //
    // Open output file
    //

    FSDataOutputStream out = null;

    if ("-".equals(args[args.length - 1])) {
        out = new FSDataOutputStream(System.out, null);
    }

    SequenceFile.Writer writer = SequenceFile.createWriter(conf,
            SequenceFile.Writer.compression(CompressionType.BLOCK, new DefaultCodec()),
            SequenceFile.Writer.keyClass(BytesWritable.class),
            SequenceFile.Writer.valueClass(BytesWritable.class),
            null == out ? SequenceFile.Writer.file(new Path(args[args.length - 1]))
                    : SequenceFile.Writer.stream(out));

    InputStream is = null;

    if (dumpurl.startsWith("http://") || dumpurl.startsWith("https://")) {
        URLConnection conn = new URL(dumpurl).openConnection();
        conn.setDoInput(true);
        conn.connect();
        is = conn.getInputStream();
    } else if ("-".equals(dumpurl)) {
        is = System.in;
    } else {
        is = new FileInputStream(dumpurl);
    }

    BufferedReader br = new BufferedReader(new InputStreamReader(is));

    TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());

    while (true) {
        String line = br.readLine();

        if (null == line) {
            break;
        }

        //
        // Extract ts// class{labels}
        //

        String meta = line.substring(0, line.indexOf('}') + 1);

        //
        // Parse a dummy line 'ts// class{labels} T' to retrieve the Metadata
        //

        GTSEncoder encoder = GTSHelper.parse(null, meta + " T");

        Metadata metadata = encoder.getMetadata();

        // Retrieve potential dummy elevation which will encode the number of datapoints encoded

        GTSDecoder decoder = encoder.getDecoder();
        decoder.next();

        long count = decoder.getElevation();

        //
        // Create a GTSWrapper
        //

        GTSWrapper wrapper = new GTSWrapper();
        wrapper.setMetadata(metadata);
        wrapper.setBase(encoder.getBaseTimestamp());

        if (GeoTimeSerie.NO_ELEVATION != count) {
            wrapper.setCount(count);
        } else {
            wrapper.setCount(0L);
        }

        //
        // Retrieve encoded datapoints
        //

        byte[] datapoints = OrderPreservingBase64
                .decode(line.substring(line.indexOf('}') + 2).getBytes(Charsets.UTF_8));

        writer.append(new BytesWritable(serializer.serialize(wrapper)), new BytesWritable(datapoints));
    }

    writer.close();
    br.close();
    is.close();

    return 0;
}

From source file:io.warp10.standalone.StandaloneChunkedMemoryStore.java

License:Apache License

public void dump(String path) throws IOException {

    long nano = System.nanoTime();
    int gts = 0;/*from   w  w w  . j  a v  a 2 s.  c om*/
    long bytes = 0L;

    Configuration conf = new Configuration();

    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();

    CompressionCodec Codec = new DefaultCodec();
    SequenceFile.Writer writer = null;
    SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path));
    SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
    SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
    SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);

    writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);

    TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());

    try {
        for (Entry<BigInteger, InMemoryChunkSet> entry : this.series.entrySet()) {
            gts++;
            Metadata metadata = this.directoryClient.getMetadataById(entry.getKey());

            List<GTSDecoder> decoders = entry.getValue().getDecoders();

            //GTSEncoder encoder = entry.getValue().fetchEncoder(now, this.chunkcount * this.chunkspan);

            for (GTSDecoder decoder : decoders) {
                GTSWrapper wrapper = new GTSWrapper(metadata);

                wrapper.setBase(decoder.getBaseTimestamp());
                wrapper.setCount(decoder.getCount());

                byte[] data = serializer.serialize(wrapper);
                key.set(data, 0, data.length);

                ByteBuffer bb = decoder.getBuffer();

                ByteBuffer rwbb = ByteBuffer.allocate(bb.remaining());
                rwbb.put(bb);
                rwbb.rewind();
                value.set(rwbb.array(), rwbb.arrayOffset(), rwbb.remaining());

                bytes += key.getLength() + value.getLength();

                writer.append(key, value);
            }
        }
    } catch (IOException ioe) {
        ioe.printStackTrace();
        throw ioe;
    } catch (Exception e) {
        e.printStackTrace();
        throw new IOException(e);
    }

    writer.close();

    nano = System.nanoTime() - nano;

    System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms.");
}

From source file:io.warp10.standalone.StandaloneMemoryStore.java

License:Apache License

public void dump(String path) throws IOException {

    long nano = System.nanoTime();
    int gts = 0;//from   w w w  .j  a  v a  2  s.com
    long bytes = 0L;

    Configuration conf = new Configuration();

    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();

    CompressionCodec Codec = new DefaultCodec();
    SequenceFile.Writer writer = null;
    SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path));
    SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
    SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
    SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);

    writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);

    TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());

    try {
        for (Entry<BigInteger, GTSEncoder> entry : this.series.entrySet()) {
            gts++;
            Metadata metadata = this.directoryClient.getMetadataById(entry.getKey());

            GTSWrapper wrapper = new GTSWrapper(metadata);

            GTSEncoder encoder = entry.getValue();

            wrapper.setBase(encoder.getBaseTimestamp());
            wrapper.setCount(encoder.getCount());

            byte[] data = serializer.serialize(wrapper);
            key.set(data, 0, data.length);

            data = encoder.getBytes();
            value.set(data, 0, data.length);

            bytes += key.getLength() + value.getLength();

            writer.append(key, value);
        }
        /*      
              for (Entry<BigInteger,Metadata> entry: this.metadatas.entrySet()) {
                gts++;
                byte[] data = serializer.serialize(entry.getValue());
                key.set(data, 0, data.length);
                        
                GTSEncoder encoder = this.series.get(entry.getKey());
                data = encoder.getBytes();
                value.set(data, 0, data.length);
                
                bytes += key.getLength() + value.getLength();
                        
                writer.append(key, value);
              }
        */
    } catch (IOException ioe) {
        ioe.printStackTrace();
        throw ioe;
    } catch (Exception e) {
        e.printStackTrace();
        throw new IOException(e);
    }

    writer.close();

    nano = System.nanoTime() - nano;

    System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms.");
}

From source file:ldbc.snb.datagen.serializer.UpdateEventSerializer.java

License:Open Source License

public UpdateEventSerializer(Configuration conf, String fileNamePrefix, int reducerId, int numPartitions)
        throws IOException {
    conf_ = conf;//from   w w  w. j  ava 2s.c  om
    reducerId_ = reducerId;
    stringBuffer_ = new StringBuffer(512);
    data_ = new ArrayList<String>();
    list_ = new ArrayList<String>();
    currentEvent_ = new UpdateEvent(-1, -1, UpdateEvent.UpdateEventType.NO_EVENT, new String(""));
    numPartitions_ = numPartitions;
    stats_ = new UpdateStreamStats();
    fileNamePrefix_ = fileNamePrefix;
    try {
        streamWriter_ = new SequenceFile.Writer[numPartitions_];
        FileContext fc = FileContext.getFileContext(conf);
        for (int i = 0; i < numPartitions_; ++i) {
            Path outFile = new Path(fileNamePrefix_ + "_" + i);
            streamWriter_[i] = SequenceFile.createWriter(fc, conf, outFile, UpdateEventKey.class, Text.class,
                    CompressionType.NONE, new DefaultCodec(), new SequenceFile.Metadata(),
                    EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
                    Options.CreateOpts.checksumParam(Options.ChecksumOpt.createDisabled()));
            FileSystem fs = FileSystem.get(conf);
            Path propertiesFile = new Path(fileNamePrefix_ + ".properties");
            if (fs.exists(propertiesFile)) {
                FSDataInputStream file = fs.open(propertiesFile);
                Properties properties = new Properties();
                properties.load(file);
                stats_.minDate_ = Long
                        .parseLong(properties.getProperty("ldbc.snb.interactive.min_write_event_start_time"));
                stats_.maxDate_ = Long
                        .parseLong(properties.getProperty("ldbc.snb.interactive.max_write_event_start_time"));
                stats_.count_ = Long.parseLong(properties.getProperty("ldbc.snb.interactive.num_events"));
                file.close();
                fs.delete(propertiesFile, true);
            }
        }
    } catch (IOException e) {
        throw e;
    }
}

From source file:nl.surfsara.newsreader.loader.WriteNewsreaderDocs.java

License:Apache License

private void initWriter(Configuration conf, String path) throws IOException {
    CompressionCodec Codec = new DefaultCodec();
    writer = null;//  w ww. j a  v  a2  s. co  m
    Option optPath = SequenceFile.Writer.file(new Path(path));
    Option optKey = SequenceFile.Writer.keyClass(Text.class);
    Option optVal = SequenceFile.Writer.valueClass(Text.class);
    Option optCom = SequenceFile.Writer.compression(CompressionType.BLOCK, Codec);
    writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);
}

From source file:org.apache.accumulo.server.logger.LogWriter.java

License:Apache License

@Override
public LogCopyInfo startCopy(TInfo info, AuthInfo credentials, final String localLog,
        final String fullyQualifiedFileName, final boolean sort) {
    log.info("Copying " + localLog + " to " + fullyQualifiedFileName);
    final long t1 = System.currentTimeMillis();
    try {/*from w  w  w .j a va  2 s .  co m*/
        Long id = file2id.get(localLog);
        if (id != null)
            close(info, id);
    } catch (NoSuchLogIDException e) {
        log.error("Unexpected error thrown", e);
        throw new RuntimeException(e);
    }
    File file;
    try {
        file = new File(findLocalFilename(localLog));
        log.info(file.getAbsoluteFile().toString());
    } catch (FileNotFoundException ex) {
        throw new RuntimeException(ex);
    }
    long result = file.length();

    copyThreadPool.execute(new Runnable() {
        @Override
        public void run() {
            Thread.currentThread().setName("Copying " + localLog + " to shared file system");
            for (int i = 0; i < 3; i++) {
                try {
                    if (sort) {
                        copySortLog(localLog, fullyQualifiedFileName);
                    } else {
                        copyLog(localLog, fullyQualifiedFileName);
                    }
                    return;
                } catch (IOException e) {
                    log.error("error during copy", e);
                }
                UtilWaitThread.sleep(1000);
            }
            log.error("Unable to copy file to DFS, too many retries " + localLog);
            try {
                fs.create(new Path(fullyQualifiedFileName + ".failed")).close();
            } catch (IOException ex) {
                log.error("Unable to create failure flag file", ex);
            }
            long t2 = System.currentTimeMillis();
            if (metrics.isEnabled())
                metrics.add(LogWriterMetrics.copy, (t2 - t1));
        }

        private void copySortLog(String localLog, String fullyQualifiedFileName) throws IOException {
            final long SORT_BUFFER_SIZE = acuConf.getMemoryInBytes(Property.LOGGER_SORT_BUFFER_SIZE);

            FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw());
            Path dest = new Path(fullyQualifiedFileName + ".recovered");
            log.debug("Sorting log file to DSF " + dest);
            fs.mkdirs(dest);
            int part = 0;

            Reader reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
            try {
                final ArrayList<Pair<LogFileKey, LogFileValue>> kv = new ArrayList<Pair<LogFileKey, LogFileValue>>();
                long memorySize = 0;
                while (true) {
                    final long position = reader.getPosition();
                    final LogFileKey key = new LogFileKey();
                    final LogFileValue value = new LogFileValue();
                    try {
                        if (!reader.next(key, value))
                            break;
                    } catch (EOFException e) {
                        log.warn("Unexpected end of file reading write ahead log " + localLog);
                        break;
                    }
                    kv.add(new Pair<LogFileKey, LogFileValue>(key, value));
                    memorySize += reader.getPosition() - position;
                    if (memorySize > SORT_BUFFER_SIZE) {
                        writeSortedEntries(dest, part++, kv);
                        kv.clear();
                        memorySize = 0;
                    }
                }

                if (!kv.isEmpty())
                    writeSortedEntries(dest, part++, kv);
                fs.create(new Path(dest, "finished")).close();
            } finally {
                reader.close();
            }
        }

        private void writeSortedEntries(Path dest, int part, final List<Pair<LogFileKey, LogFileValue>> kv)
                throws IOException {
            String path = dest + String.format("/part-r-%05d", part);
            log.debug("Writing partial log file to DSF " + path);
            log.debug("Sorting");
            Span span = Trace.start("Logger sort");
            span.data("logfile", dest.getName());
            Collections.sort(kv, new Comparator<Pair<LogFileKey, LogFileValue>>() {
                @Override
                public int compare(Pair<LogFileKey, LogFileValue> o1, Pair<LogFileKey, LogFileValue> o2) {
                    return o1.getFirst().compareTo(o2.getFirst());
                }
            });
            span.stop();
            span = Trace.start("Logger write");
            span.data("logfile", dest.getName());
            MapFile.Writer writer = new MapFile.Writer(fs.getConf(), fs, path, LogFileKey.class,
                    LogFileValue.class);
            short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
            fs.setReplication(new Path(path + "/" + MapFile.DATA_FILE_NAME), replication);
            fs.setReplication(new Path(path + "/" + MapFile.INDEX_FILE_NAME), replication);
            try {
                for (Pair<LogFileKey, LogFileValue> entry : kv)
                    writer.append(entry.getFirst(), entry.getSecond());
            } finally {
                writer.close();
                span.stop();
            }
        }

        private void copyLog(final String localLog, final String fullyQualifiedFileName) throws IOException {
            Path dest = new Path(fullyQualifiedFileName + ".copy");
            log.debug("Copying log file to DSF " + dest);
            fs.delete(dest, true);
            LogFileKey key = new LogFileKey();
            LogFileValue value = new LogFileValue();
            Writer writer = null;
            Reader reader = null;
            try {
                short replication = (short) acuConf.getCount(Property.LOGGER_RECOVERY_FILE_REPLICATION);
                writer = SequenceFile.createWriter(fs, fs.getConf(), dest, LogFileKey.class, LogFileValue.class,
                        fs.getConf().getInt("io.file.buffer.size", 4096), replication, fs.getDefaultBlockSize(),
                        SequenceFile.CompressionType.BLOCK, new DefaultCodec(), null, new Metadata());
                FileSystem local = TraceFileSystem.wrap(FileSystem.getLocal(fs.getConf()).getRaw());
                reader = new SequenceFile.Reader(local, new Path(findLocalFilename(localLog)), fs.getConf());
                while (reader.next(key, value)) {
                    writer.append(key, value);
                }
            } catch (IOException ex) {
                log.warn("May have a partial copy of a recovery file: " + localLog, ex);
            } finally {
                if (reader != null)
                    reader.close();
                if (writer != null)
                    writer.close();
            }
            // Make file appear in the shared file system as the target name only after it is completely copied
            fs.rename(dest, new Path(fullyQualifiedFileName));
            log.info("Copying " + localLog + " complete");
        }
    });
    return new LogCopyInfo(result, null);
}

From source file:org.apache.gobblin.metastore.FsStateStore.java

License:Apache License

/**
 * See {@link StateStore#put(String, String, T)}.
 *
 * <p>/*ww w  .j  a va  2  s. co m*/
 *   This implementation does not support putting the state object into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void put(String storeName, String tableName, T state) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);

    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    }

    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath,
                Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }

    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        renamePath(tmpTablePath, tablePath);
    }
}