Example usage for org.apache.hadoop.io BytesWritable set

List of usage examples for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length) 

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:io.covert.binary.analysis.BuildTarBzSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(name)) {
        fs.mkdirs(name);/*from   w w  w . j av a2s . c  om*/
    }
    for (File file : inDir.listFiles()) {
        Path sequenceName = new Path(name, file.getName() + ".seq");
        System.out.println("Writing to " + sequenceName);
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class,
                BytesWritable.class, CompressionType.RECORD);
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        final InputStream is = new FileInputStream(file);
        final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
                .createArchiveInputStream("tar", is);
        TarArchiveEntry entry = null;
        while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
            if (!entry.isDirectory()) {

                final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
                byte[] outputFile = outputFileStream.toByteArray();
                val.set(outputFile, 0, outputFile.length);

                MessageDigest md = MessageDigest.getInstance("MD5");
                md.update(outputFile);
                byte[] digest = md.digest();
                String hexdigest = "";
                for (int i = 0; i < digest.length; i++) {
                    hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1);
                }
                key.set(hexdigest);
                writer.append(key, val);
            }
        }
        debInputStream.close();
        writer.close();
    }

    return 0;
}

From source file:io.github.thammegowda.Local2SeqFile.java

License:Apache License

private void writeOutput(RemoteIterator<? extends FileStatus> input) throws IOException {
    Path outPath = new Path(output);
    if (distribFs.exists(outPath)) {
        throw new IllegalArgumentException("Output file already exists, Not overwriting it:" + output);
    }/*from  ww  w.j  a  va  2s. co  m*/

    Writer writer = SequenceFile.createWriter(distribFs.getConf(), Writer.file(outPath),
            Writer.keyClass(Text.class), Writer.valueClass(BytesWritable.class),
            Writer.compression(SequenceFile.CompressionType.RECORD));
    Text key = new Text();
    BytesWritable value = new BytesWritable();
    long skipped = 0;
    long copied = 0;
    while (input.hasNext()) {
        FileStatus next = input.next();
        if (filter(next)) {
            key.set(next.getPath().toString());
            FSDataInputStream stream = localFs.open(next.getPath());
            //CAUTION : this could cause memory overflow
            byte[] bytes = IOUtils.toByteArray(stream);
            value.set(bytes, 0, bytes.length);
            writer.append(key, value);
            copied++;
        } else {
            skipped++;
        }
    }
    writer.close();
    System.out.println("Files copied ::" + copied);
    System.out.println("Files skipped ::" + skipped);
}

From source file:io.prestosql.plugin.hive.util.TestSerDeUtils.java

License:Apache License

@Test
public void testReuse() {
    BytesWritable value = new BytesWritable();

    byte[] first = "hello world".getBytes(UTF_8);
    value.set(first, 0, first.length);

    byte[] second = "bye".getBytes(UTF_8);
    value.set(second, 0, second.length);

    Type type = new TypeToken<Map<BytesWritable, Long>>() {
    }.getType();/*from  w w  w .  j  ava2s  .  co  m*/
    ObjectInspector inspector = getInspector(type);

    Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L),
            inspector);
    Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L);

    assertBlockEquals(actual, expected);
}

From source file:io.warp10.standalone.StandaloneChunkedMemoryStore.java

License:Apache License

public void dump(String path) throws IOException {

    long nano = System.nanoTime();
    int gts = 0;/*w  w w  . ja va 2 s.  c o  m*/
    long bytes = 0L;

    Configuration conf = new Configuration();

    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();

    CompressionCodec Codec = new DefaultCodec();
    SequenceFile.Writer writer = null;
    SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path));
    SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
    SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
    SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);

    writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);

    TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());

    try {
        for (Entry<BigInteger, InMemoryChunkSet> entry : this.series.entrySet()) {
            gts++;
            Metadata metadata = this.directoryClient.getMetadataById(entry.getKey());

            List<GTSDecoder> decoders = entry.getValue().getDecoders();

            //GTSEncoder encoder = entry.getValue().fetchEncoder(now, this.chunkcount * this.chunkspan);

            for (GTSDecoder decoder : decoders) {
                GTSWrapper wrapper = new GTSWrapper(metadata);

                wrapper.setBase(decoder.getBaseTimestamp());
                wrapper.setCount(decoder.getCount());

                byte[] data = serializer.serialize(wrapper);
                key.set(data, 0, data.length);

                ByteBuffer bb = decoder.getBuffer();

                ByteBuffer rwbb = ByteBuffer.allocate(bb.remaining());
                rwbb.put(bb);
                rwbb.rewind();
                value.set(rwbb.array(), rwbb.arrayOffset(), rwbb.remaining());

                bytes += key.getLength() + value.getLength();

                writer.append(key, value);
            }
        }
    } catch (IOException ioe) {
        ioe.printStackTrace();
        throw ioe;
    } catch (Exception e) {
        e.printStackTrace();
        throw new IOException(e);
    }

    writer.close();

    nano = System.nanoTime() - nano;

    System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms.");
}

From source file:io.warp10.standalone.StandaloneMemoryStore.java

License:Apache License

public void dump(String path) throws IOException {

    long nano = System.nanoTime();
    int gts = 0;/*  w w  w  . jav a  2 s  .  c o m*/
    long bytes = 0L;

    Configuration conf = new Configuration();

    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    BytesWritable key = new BytesWritable();
    BytesWritable value = new BytesWritable();

    CompressionCodec Codec = new DefaultCodec();
    SequenceFile.Writer writer = null;
    SequenceFile.Writer.Option optPath = SequenceFile.Writer.file(new Path(path));
    SequenceFile.Writer.Option optKey = SequenceFile.Writer.keyClass(key.getClass());
    SequenceFile.Writer.Option optVal = SequenceFile.Writer.valueClass(value.getClass());
    SequenceFile.Writer.Option optCom = SequenceFile.Writer.compression(CompressionType.RECORD, Codec);

    writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom);

    TSerializer serializer = new TSerializer(new TCompactProtocol.Factory());

    try {
        for (Entry<BigInteger, GTSEncoder> entry : this.series.entrySet()) {
            gts++;
            Metadata metadata = this.directoryClient.getMetadataById(entry.getKey());

            GTSWrapper wrapper = new GTSWrapper(metadata);

            GTSEncoder encoder = entry.getValue();

            wrapper.setBase(encoder.getBaseTimestamp());
            wrapper.setCount(encoder.getCount());

            byte[] data = serializer.serialize(wrapper);
            key.set(data, 0, data.length);

            data = encoder.getBytes();
            value.set(data, 0, data.length);

            bytes += key.getLength() + value.getLength();

            writer.append(key, value);
        }
        /*      
              for (Entry<BigInteger,Metadata> entry: this.metadatas.entrySet()) {
                gts++;
                byte[] data = serializer.serialize(entry.getValue());
                key.set(data, 0, data.length);
                        
                GTSEncoder encoder = this.series.get(entry.getKey());
                data = encoder.getBytes();
                value.set(data, 0, data.length);
                
                bytes += key.getLength() + value.getLength();
                        
                writer.append(key, value);
              }
        */
    } catch (IOException ioe) {
        ioe.printStackTrace();
        throw ioe;
    } catch (Exception e) {
        e.printStackTrace();
        throw new IOException(e);
    }

    writer.close();

    nano = System.nanoTime() - nano;

    System.out.println("Dumped " + gts + " GTS (" + bytes + " bytes) in " + (nano / 1000000.0D) + " ms.");
}

From source file:newprotobuf.mapred.ProtobufRecordReader.java

License:Open Source License

public synchronized boolean next(LongWritable key, BytesWritable value) throws IOException {

    size = 0;//ww w .ja v  a  2  s  .c  o m
    boolean readend = readLittleEndianInt(in);
    if (readend) {
        LOG.info("read the pb file completely");
        return false;
    }

    if (size < 0) {
        LOG.info("Parse the pbfile error:" + file.toUri().toString());
        LOG.info("get size " + size);
        if (skipbad) {
            LOG.info("Skip the bad file");
            reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1);
            return false;
        } else {
            throw (new IOException("Bad format pbfile"));
        }
    }

    pos += 2;
    if (size == 0) {
        value.set(buffer, 0, 0);
        return true;
    }

    pos += size;
    key.set(pos);

    int readlen = 0;
    if (size < buffer.length) {
        int already_read = 0;
        while (already_read < size) {
            readlen = in.read(buffer, already_read, size - already_read);
            if (readlen == -1) {
                if (already_read < size) {
                    LOG.info("Parse the pbfile error:" + file.toUri().toString());
                    LOG.info("current read size" + readlen + " but expected size:" + size);
                    if (skipbad) {
                        LOG.info("Skip the bad file");
                        reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1);
                        return false;
                    } else {
                        throw (new IOException("Bad format pbfile"));
                    }
                } else
                    break;
            }

            already_read += readlen;
        }
        value.set(buffer, 0, size);
    } else {
        byte[] tmp = new byte[size];
        int already_read = 0;
        while (already_read < size) {
            readlen = in.read(tmp, already_read, size - already_read);
            if (readlen == -1) {
                if (already_read < size) {
                    LOG.info("Parse the pbfile error:" + file.toUri().toString());
                    LOG.info("current read size" + readlen + " but expected size:" + size);
                    if (skipbad) {
                        LOG.info("Skip the bad file");
                        reporter.incrCounter(Counter.BADFORMAT_FILE_COUNT, 1);
                        return false;
                    } else {
                        throw (new IOException("Bad format pbfile"));
                    }
                } else
                    break;
            }

            already_read += readlen;
        }
        value.set(tmp, 0, size);
    }

    return true;
}

From source file:org.acacia.partitioner.java.WholeFileRecordReader.java

License:Apache License

@Override
public boolean next(NullWritable key, BytesWritable value) throws IOException {
    if (!processed) {
        byte[] contents = new byte[(int) fileSplit.getLength()];
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream in = null;/* w w  w .java  2  s  . co m*/
        try {
            in = fs.open(file);
            IOUtils.readFully(in, contents, 0, contents.length);
            value.set(contents, 0, contents.length);
        } finally {
            IOUtils.closeStream(in);
        }
        processed = true;
        return true;
    }
    return false;
}

From source file:org.apache.avro.mapred.PipesCompatibleAvroRecordReader.java

License:Apache License

/**
 * If another key and value exist, updates the given key and value
 * BytesWritables and returns true. Else, returns false
 * /*from  w w w  . j ava  2 s .  com*/
 * @param key           the BytesWritable key to update with the next key,
 *                      if it exists
 * @param value         the BytesWritable value to update with the next
 *                      value, if it exists
 * @return              true if the next key/value pair exists, otherwise
 *                      false
 * @throws IOException  if the AvroRecordReader or the KeyValueGetter
 *                      return an IOException
 */
public boolean next(BytesWritable key, BytesWritable value) throws IOException {
    if (avroRecordReader.next(avroWrapper, nullWritable)) {
        keyValueGetter.give(avroWrapper.datum());

        keyBytes = keyValueGetter.getKey();
        valueBytes = keyValueGetter.getValue();

        key.set(keyBytes, 0, keyBytes.length);
        value.set(valueBytes, 0, valueBytes.length);

        return true;
    }

    return false;
}

From source file:org.apache.camel.component.hdfs.HdfsConsumerTest.java

License:Apache License

@Test
public void testReadBytes() throws Exception {
    if (!canTest()) {
        return;//  ww w  . ja  v  a2 s  .c o m
    }

    final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, BytesWritable.class);
    NullWritable keyWritable = NullWritable.get();
    BytesWritable valueWritable = new BytesWritable();
    String value = "CIAO!";
    valueWritable.set(value.getBytes(), 0, value.getBytes().length);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                    .to("mock:result");
        }
    });
    context.start();

    resultEndpoint.assertIsSatisfied();
}

From source file:org.apache.camel.component.hdfs2.HdfsConsumerTest.java

License:Apache License

@Test
public void testReadBytes() throws Exception {
    if (!canTest()) {
        return;//  ww  w.j a  va 2s.  c o  m
    }

    final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BytesWritable.class);
    NullWritable keyWritable = NullWritable.get();
    BytesWritable valueWritable = new BytesWritable();
    String value = "CIAO!";
    valueWritable.set(value.getBytes(), 0, value.getBytes().length);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();

    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);

    context.addRoutes(new RouteBuilder() {
        public void configure() {
            from("hdfs2:///" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0")
                    .to("mock:result");
        }
    });
    context.start();

    resultEndpoint.assertIsSatisfied();
}