Example usage for org.apache.hadoop.io BytesWritable copyBytes

List of usage examples for org.apache.hadoop.io BytesWritable copyBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable copyBytes.

Prototype

public byte[] copyBytes() 

Source Link

Document

Get a copy of the bytes that is exactly the length of the data.

Usage

From source file:LookupPostingsCompressed.java

License:Apache License

/**
 * Runs this tool./*from w ww  .  j av  a 2s  .com*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostingsCompressed.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<VIntWritable, BytesWritable> value = new PairOfWritables<VIntWritable, BytesWritable>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    BytesWritable postings = value.getRightElement();
    ByteArrayInputStream buffer = new ByteArrayInputStream(postings.copyBytes());
    DataInputStream in = new DataInputStream(buffer);
    int OFFSET = 0;
    int count;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        collection.seek(OFFSET);
        System.out.println(d.readLine());
    }

    OFFSET = 0;
    key.set("gold");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'gold': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        goldHist.increment(count);
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();
    //Silver

    key.set("silver");
    reader.get(key, value);
    postings = value.getRightElement();
    buffer = new ByteArrayInputStream(postings.copyBytes());
    in = new DataInputStream(buffer);
    System.out.println("Complete postings list for 'silver': (" + value.getLeftElement() + ", [");
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        System.out.print("(" + OFFSET + ", " + count + ")");
        //collection.seek(OFFSET);
        //System.out.println(d.readLine());
        System.out.print(", ");
    }
    System.out.print("])\n");

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    buffer.reset();

    OFFSET = 0;
    while (in.available() != 0) {
        OFFSET = OFFSET + WritableUtils.readVInt(in);
        count = WritableUtils.readVInt(in);
        silverHist.increment(count);
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    buffer.close();

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}

From source file:bigsatgps.BigDataHandler.java

License:Open Source License

/**
 *
 * @param inpath//from www.j  av  a 2s .c om
 * @throws IOException
 */
public void SequenceToImage(String inpath) throws IOException {
    FileSystem fs = FileSystem.get(confHadoop);
    Path inputFilePath = new Path(inpath);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputFilePath, confHadoop);
    Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), confHadoop);
    BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), confHadoop);
    reader.next(key, value);
    byte[] imagebytearray = value.copyBytes();
    BufferedImage bufferedseq = ImageIO.read(new ByteArrayInputStream(imagebytearray));
    String outpath = (inpath.substring(0, inpath.indexOf(".")) + "copy.tif");
    System.out.println("image was successfully retrieved and written to " + outpath);
    ImageIO.write(bufferedseq, "tiff", new File(outpath));
}

From source file:cienciaCelularMR.McellMapper.java

@Override
public void map(KeyMcell key, BytesWritable value, Context output) throws IOException, InterruptedException {
    try {//  w  w  w.jav a  2  s. c  om
        System.out.println("Entro al Map");
        System.out.println("Key del mcell mapper: " + key);

        byte[] arrayByte = value.copyBytes();
        File archivo = new File("entradaMap.mdl");
        try (FileOutputStream fos = new FileOutputStream(archivo)) {
            fos.write(arrayByte);
            fos.flush();
        }

        Process process = new ProcessBuilder("mcell.exe", "-errfile", "errorMcell.txt", "entradaMap.mdl")
                .start();

        InputStream is = process.getInputStream();
        InputStreamReader isr = new InputStreamReader(is);
        BufferedReader br = new BufferedReader(isr);
        String line;
        Matcher matcher;

        System.out.println("Mcell is running");
        String res = "";
        while ((line = br.readLine()) != null) {
            res = res.concat(line);
            res = res.concat("\n");
            output.progress();
            try {
                matcher = pattern.matcher(line);
                if (matcher.find()) {
                    int fieldCount;
                    Text[] fields;

                    fieldCount = matcher.groupCount();
                    fields = new Text[fieldCount];
                    for (int i = 0; i < fieldCount; i++) {
                        fields[i] = new Text(matcher.group(i + 1));
                    }
                    System.out.println("Progreso: " + Integer.parseInt(fields[0].toString()) + " de "
                            + Integer.parseInt(fields[1].toString()));
                }
            } catch (Exception ex) {
            }
        }

        File errorFile = new File("errorMcell.txt");
        if (errorFile.exists()) {
            InputStream in = new FileInputStream(errorFile);
            BufferedReader reader = new BufferedReader(new InputStreamReader(in));
            String l;
            while ((l = reader.readLine()) != null) {
                res = res.concat(l);
                res = res.concat("\n");
            }
        }

        mos.write("controloutput", key, new Text(res));

        //free memory
        res = "";
        System.out.println("Leyendo salida de MCell...");

        String salidaName = "salidaMCell-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".dat";
        FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName));

        File salidaFile = new File("joined_1.dat");

        if (salidaFile.exists()) {
            FileInputStream ios = new FileInputStream(salidaFile);
            byte[] buf = new byte[1024];
            int totalbytes = 0;
            int bytesRead;
            while ((bytesRead = ios.read(buf)) > 0) {
                totalbytes += bytesRead;
                fs.write(buf, 0, bytesRead);
                fs.flush();
                output.progress();
            }
            fs.close();
            ios.close();

            System.out.println("***Mcell termino de leer y guardar archivo .dat, tamao: " + totalbytes);
            System.out.println("Nombre que se le pasa a Fernet: " + salidaName);
            output.write(key, new Text(salidaName));
        } else {
            errorFile = new File("errorMcell.txt");
            if (errorFile.exists()) {
                InputStream in = new FileInputStream(errorFile);
                BufferedReader reader = new BufferedReader(new InputStreamReader(in));
                String l;
                while ((l = reader.readLine()) != null) {
                    res = res.concat(l);
                    res = res.concat("\n");
                }
                if (!"".equals(res)) {
                    mos.write("errormcell", key, new Text(res));
                }
            }
        }
    } catch (IOException | IllegalArgumentException | InterruptedException ex) {
        String salidaName = "errorMapper-" + key.getIdUsuario() + "." + key.getSubIdUsuario() + ".txt";
        FSDataOutputStream fs = FileSystem.get(output.getConfiguration()).create(new Path(salidaName));
        fs.write(new Byte("Error en Mapper MCell:"));
        fs.write(new Byte("\n"));
        fs.flush();
        fs.close();

        Logger.getLogger(McellMapper.class.getName()).log(Level.SEVERE, null, ex);
        throw new InterruptedException(ex.getMessage());
    }
}

From source file:com.cloudera.impala.TestUdf.java

License:Apache License

public BytesWritable evaluate(BytesWritable a) {
    if (a == null)
        return null;
    return new BytesWritable(a.copyBytes());
}

From source file:com.xiaoxiaomo.mr.MapperTest.java

License:Apache License

@Test
public void testCanMapEmptyPayload() throws IOException {
    KafkaInputSplit split = new KafkaInputSplit(1, "host-01", "topic1", 3, 1234567890L);
    MsgMetadataWritable inputKey = new MsgMetadataWritable(split, split.getStartOffset());
    BytesWritable inputValue = new BytesWritable(new byte[0]);
    mapDriver.withInput(inputKey, inputValue);
    final List<Pair<MsgMetadataWritable, BytesWritable>> result = mapDriver.run();
    BytesWritable value = result.get(0).getSecond();
    assertNotNull(value);//from w  w w .  ja v  a2 s .  c o m
    assertEquals(0, value.copyBytes().length);
}

From source file:eagle.query.aggregate.raw.RawGroupbyBucket.java

License:Apache License

/**
 * expensive operation - create objects and format the result
 * @return//from  w  w  w . j a  va  2s .  co  m
 */
public Map<List<String>, List<Double>> result() {
    Map<List<String>, List<Double>> result = new HashMap<List<String>, List<Double>>();
    for (Map.Entry<GroupbyKey, List<Function>> entry : this.group2FunctionMap.entrySet()) {
        List<Double> values = new ArrayList<Double>();
        for (Function f : entry.getValue()) {
            values.add(f.result());
        }
        GroupbyKey key = entry.getKey();
        List<BytesWritable> list1 = key.getValue();
        List<String> list2 = new ArrayList<String>();
        for (BytesWritable e : list1) {
            list2.add(new String(e.copyBytes()));
        }
        result.put(list2, values);
    }
    return result;
}

From source file:eagle.storage.hbase.aggregate.coprocessor.TestGroupAggregateClient.java

License:Apache License

private void logGroupbyKeyValue(List<GroupbyKeyValue> keyValues) {
    for (GroupbyKeyValue keyValue : keyValues) {
        GroupbyKey key = keyValue.getKey();
        List<String> keys = new ArrayList<String>();
        for (BytesWritable bytes : key.getValue()) {
            keys.add(new String(bytes.copyBytes()));
        }/*from   w w w . j a  v  a2s  .c  om*/
        List<Double> vals = new ArrayList<Double>();
        GroupbyValue val = keyValue.getValue();
        for (DoubleWritable dw : val.getValue()) {
            vals.add(dw.get());
        }
        if (LOG.isDebugEnabled())
            LOG.debug("KEY: " + keys + ", VALUE: " + vals);
    }
}

From source file:in.dream_lab.goffish.hama.FullInfoNonSplitReader.java

License:Apache License

@Override
public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs()
        throws IOException, SyncException, InterruptedException {

    long startTime = System.currentTimeMillis();

    KeyValuePair<Writable, Writable> pair;
    while ((pair = peer.readNext()) != null) {
        String stringInput = pair.getValue().toString();
        createVertex(stringInput);//w  w w.  j  a v  a  2  s  .  c  om
    }

    LOG.debug("Finished Graph Loading in partition" + peer.getPeerIndex());

    // broadcast all subgraphs belonging to this partition
    Message<K, M> subgraphMapppingMessage = new Message<>();
    subgraphMapppingMessage.setMessageType(Message.MessageType.CUSTOM_MESSAGE);
    ControlMessage controlInfo = new ControlMessage();
    controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST);
    controlInfo.setPartitionID(peer.getPeerIndex());
    subgraphMapppingMessage.setControlInfo(controlInfo);
    for (ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable> subgraph : partition.getSubgraphs()) {

        byte subgraphIDbytes[] = Longs.toByteArray(subgraph.getSubgraphId().get());
        controlInfo.addextraInfo(subgraphIDbytes);
    }

    sendToAllPartitions(subgraphMapppingMessage);

    LOG.debug("Subgraph partition Broadcast sent");
    long endTime = System.currentTimeMillis();
    LOG.info("GOFFISH3.PERF.GRAPH_LOAD," + peer.getPeerIndex() + "," + peer.getSuperstepCount() + ","
            + startTime + "," + endTime + "," + (endTime - startTime));

    peer.sync();

    startTime = System.currentTimeMillis();

    Message<K, M> subgraphMappingInfoMessage;
    while ((subgraphMappingInfoMessage = peer.getCurrentMessage()) != null) {
        ControlMessage receivedCtrl = (ControlMessage) subgraphMappingInfoMessage.getControlInfo();
        Integer partitionID = receivedCtrl.getPartitionID();
        for (BytesWritable rawSubgraphID : receivedCtrl.getExtraInfo()) {
            LongWritable subgraphID = new LongWritable(Longs.fromByteArray(rawSubgraphID.copyBytes()));
            subgraphPartitionMap.put((K) subgraphID, partitionID);
        }
    }
    LOG.debug("Reader Completed");
    endTime = System.currentTimeMillis();
    LOG.info("GOFFISH3.PERF.GRAPH_LOAD," + peer.getPeerIndex() + "," + peer.getSuperstepCount() + ","
            + startTime + "," + endTime + "," + (endTime - startTime));

    return partition.getSubgraphs();
}

From source file:in.dream_lab.goffish.hama.FullInfoSplitReader.java

License:Apache License

@Override
public List<ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable>> getSubgraphs()
        throws IOException, SyncException, InterruptedException {

    KeyValuePair<Writable, Writable> pair;
    while ((pair = peer.readNext()) != null) {
        String stringInput = pair.getValue().toString();
        // pid is the first column and its range is 0 to max pid
        int partitionID = Integer.parseInt(stringInput.substring(0, stringInput.indexOf('\t')));
        LOG.debug("partitionID = " + partitionID);

        if (partitionID != peer.getPeerIndex()) {
            // send vertex to its correct partition
            Message<K, M> msg = new Message<>();
            msg.setMessageType(Message.MessageType.VERTEX);
            ControlMessage ctrl = new ControlMessage();
            ctrl.setTransmissionType(IControlMessage.TransmissionType.VERTEX);
            ctrl.addextraInfo(stringInput.getBytes());
            msg.setControlInfo(ctrl);// www . j  a  va2  s.co m
            peer.send(peer.getPeerName(partitionID), msg);

        } else {

            // belongs to this partition
            createVertex(stringInput);
        }
    }

    peer.sync();

    Message<K, M> msg;
    //recieve all incoming vertices
    while ((msg = peer.getCurrentMessage()) != null) {
        ControlMessage receivedCtrl = (ControlMessage) msg.getControlInfo();
        createVertex(new String(receivedCtrl.getExtraInfo().iterator().next().copyBytes()));
    }

    // broadcast all subgraphs belonging to this partition
    Message<K, M> subgraphMapppingMessage = new Message<>();
    subgraphMapppingMessage.setMessageType(Message.MessageType.CUSTOM_MESSAGE);
    ControlMessage controlInfo = new ControlMessage();
    controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST);
    controlInfo.setPartitionID(peer.getPeerIndex());
    subgraphMapppingMessage.setControlInfo(controlInfo);
    for (ISubgraph<S, V, E, LongWritable, LongWritable, LongWritable> subgraph : partition.getSubgraphs()) {

        byte subgraphIDbytes[] = Longs.toByteArray(subgraph.getSubgraphId().get());
        controlInfo.addextraInfo(subgraphIDbytes);
    }

    sendToAllPartitions(subgraphMapppingMessage);

    peer.sync();
    Message<K, M> subgraphMappingInfoMessage;
    while ((subgraphMappingInfoMessage = peer.getCurrentMessage()) != null) {
        ControlMessage receivedCtrl = (ControlMessage) subgraphMappingInfoMessage.getControlInfo();
        Integer partitionID = receivedCtrl.getPartitionID();
        for (BytesWritable rawSubgraphID : receivedCtrl.getExtraInfo()) {
            LongWritable subgraphID = new LongWritable(Longs.fromByteArray(rawSubgraphID.copyBytes()));
            subgraphPartitionMap.put((K) subgraphID, partitionID);
        }
    }

    return partition.getSubgraphs();
}

From source file:in.dream_lab.goffish.hama.FullInfoSplitReaderInt.java

License:Apache License

@Override
public List<ISubgraph<S, V, E, LongWritable, IntWritable, LongWritable>> getSubgraphs()
        throws IOException, SyncException, InterruptedException {

    KeyValuePair<Writable, Writable> pair;
    while ((pair = peer.readNext()) != null) {
        String stringInput = pair.getValue().toString();
        // pid is the first column and its range is 0 to max pid
        int partitionID = Integer.parseInt(stringInput.substring(0, stringInput.indexOf('\t')));
        LOG.debug("partitionID = " + partitionID);

        if (partitionID != peer.getPeerIndex()) {
            // send vertex to its correct partition
            Message<K, M> msg = new Message<>();
            msg.setMessageType(Message.MessageType.VERTEX);
            ControlMessage ctrl = new ControlMessage();
            ctrl.setTransmissionType(IControlMessage.TransmissionType.VERTEX);
            ctrl.addextraInfo(stringInput.getBytes());
            msg.setControlInfo(ctrl);//w w w  .j av  a  2 s. co m
            peer.send(peer.getPeerName(partitionID), msg);

        } else {

            // belongs to this partition
            createVertex(stringInput);
        }
    }

    peer.sync();

    Message<K, M> msg;
    //recieve all incoming vertices
    while ((msg = peer.getCurrentMessage()) != null) {
        ControlMessage receivedCtrl = (ControlMessage) msg.getControlInfo();
        createVertex(new String(receivedCtrl.getExtraInfo().iterator().next().copyBytes()));
    }

    // broadcast all subgraphs belonging to this partition
    Message<K, M> subgraphMapppingMessage = new Message<>();
    subgraphMapppingMessage.setMessageType(Message.MessageType.CUSTOM_MESSAGE);
    ControlMessage controlInfo = new ControlMessage();
    controlInfo.setTransmissionType(IControlMessage.TransmissionType.BROADCAST);
    controlInfo.setPartitionID(peer.getPeerIndex());
    subgraphMapppingMessage.setControlInfo(controlInfo);
    for (ISubgraph<S, V, E, LongWritable, IntWritable, LongWritable> subgraph : partition.getSubgraphs()) {

        byte subgraphIDbytes[] = Longs.toByteArray(subgraph.getSubgraphId().get());
        controlInfo.addextraInfo(subgraphIDbytes);
    }

    sendToAllPartitions(subgraphMapppingMessage);

    peer.sync();
    Message<K, M> subgraphMappingInfoMessage;
    while ((subgraphMappingInfoMessage = peer.getCurrentMessage()) != null) {
        ControlMessage receivedCtrl = (ControlMessage) subgraphMappingInfoMessage.getControlInfo();
        Integer partitionID = receivedCtrl.getPartitionID();
        for (BytesWritable rawSubgraphID : receivedCtrl.getExtraInfo()) {
            LongWritable subgraphID = new LongWritable(Longs.fromByteArray(rawSubgraphID.copyBytes()));
            subgraphPartitionMap.put((K) subgraphID, partitionID);
        }
    }

    return partition.getSubgraphs();
}