Example usage for org.apache.hadoop.io VIntWritable VIntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io VIntWritable VIntWritable.

Prototype

public VIntWritable(int value)

Source Link

Usage

From source file:org.apache.hive.hcatalog.data.ReaderWriter.java

License:Apache License

public static void writeDatum(DataOutput out, Object val) throws IOException {
    // write the data type
    byte type = DataType.findType(val);
    out.write(type);//from   w w w.j a  va 2s. c  om
    switch (type) {
    case DataType.LIST:
        List<?> list = (List<?>) val;
        int sz = list.size();
        out.writeInt(sz);
        for (int i = 0; i < sz; i++) {
            writeDatum(out, list.get(i));
        }
        return;

    case DataType.MAP:
        Map<?, ?> m = (Map<?, ?>) val;
        out.writeInt(m.size());
        Iterator<?> i = m.entrySet().iterator();
        while (i.hasNext()) {
            Entry<?, ?> entry = (Entry<?, ?>) i.next();
            writeDatum(out, entry.getKey());
            writeDatum(out, entry.getValue());
        }
        return;

    case DataType.INTEGER:
        new VIntWritable((Integer) val).write(out);
        return;

    case DataType.LONG:
        new VLongWritable((Long) val).write(out);
        return;

    case DataType.FLOAT:
        out.writeFloat((Float) val);
        return;

    case DataType.DOUBLE:
        out.writeDouble((Double) val);
        return;

    case DataType.BOOLEAN:
        out.writeBoolean((Boolean) val);
        return;

    case DataType.BYTE:
        out.writeByte((Byte) val);
        return;

    case DataType.SHORT:
        out.writeShort((Short) val);
        return;

    case DataType.STRING:
        String s = (String) val;
        byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
        out.writeInt(utfBytes.length);
        out.write(utfBytes);
        return;

    case DataType.BINARY:
        byte[] ba = (byte[]) val;
        out.writeInt(ba.length);
        out.write(ba);
        return;

    case DataType.NULL:
        //for NULL we just write out the type
        return;
    case DataType.CHAR:
        new HiveCharWritable((HiveChar) val).write(out);
        return;
    case DataType.VARCHAR:
        new HiveVarcharWritable((HiveVarchar) val).write(out);
        return;
    case DataType.DECIMAL:
        new HiveDecimalWritable((HiveDecimal) val).write(out);
        return;
    case DataType.DATE:
        new DateWritable((Date) val).write(out);
        return;
    case DataType.TIMESTAMP:
        new TimestampWritable((java.sql.Timestamp) val).write(out);
        return;
    default:
        throw new IOException("Unexpected data type " + type + " found in stream.");
    }
}

From source file:org.commoncrawl.service.pagerank.PageRankValueReWriter.java

License:Open Source License

public static void main(String[] args) {

    int nodeIndex = Integer.parseInt(args[0]);
    LOG.info("Node Index:" + args[0]);
    int nodeCount = Integer.parseInt(args[1]);
    LOG.info("Node Count:" + args[1]);
    String idsDirectory = args[2];
    LOG.info("ID Directory is:" + args[2]);
    String valuesDirectory = args[3];
    LOG.info("Values Directory is:" + args[3]);
    int iterationNumber = Integer.parseInt(args[4]);
    LOG.info("Iteration Number is:" + args[4]);
    int runDate = Integer.parseInt(args[5]);
    LOG.info("runDate is:" + args[5]);

    Configuration conf = new Configuration();

    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site.xml");
    conf.addResource("hadoop-default.xml");
    conf.addResource("hadoop-site.xml");
    conf.addResource("commoncrawl-default.xml");
    conf.addResource("commoncrawl-site.xml");

    CrawlEnvironment.setHadoopConfig(conf);
    CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/");

    try {//  w w  w.j  a  v a  2s. co m
        FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem();

        Path outputPath = new Path("crawl/pageRank/out", Integer.toString(runDate));
        LOG.info("Output Directory is:" + outputPath);

        fileSystem.mkdirs(outputPath);

        //iterate values based on node id 
        for (int i = nodeIndex; i < nodeIndex + 1; ++i) {

            LOG.info("Processing output for Node:" + i);
            Path valuePath = new Path(valuesDirectory,
                    "value_" + NUMBER_FORMAT.format(iterationNumber) + "-" + NUMBER_FORMAT.format(i));
            LOG.info("Value File Path is:" + valuePath);
            Path idsPath = new Path(idsDirectory, "ids_" + NUMBER_FORMAT.format(i));
            LOG.info("IDs File Path is:" + idsPath);
            Path outputFile = new Path(outputPath, "part-" + NUMBER_FORMAT.format(i));
            LOG.info("Output File Path is:" + outputFile);
            byte[] valueData = null;
            {
                FileStatus valueFileStatus = fileSystem.getFileStatus(valuePath);
                FSDataInputStream valueInputStream = fileSystem.open(valuePath);
                LOG.info("Allocating Value Array of Size:" + valueFileStatus.getLen());
                valueData = new byte[(int) valueFileStatus.getLen()];
                LOG.info("Reading Value Data Size:" + valueFileStatus.getLen());
                for (int offset = 0, totalRead = 0; offset < valueFileStatus.getLen();) {
                    int bytesToRead = Math.min(16384, (int) valueFileStatus.getLen() - totalRead);
                    valueInputStream.read(valueData, offset, bytesToRead);
                    offset += bytesToRead;
                    totalRead += bytesToRead;
                }
                valueInputStream.close();
                LOG.info("Finished Reading Value Data Size:" + valueFileStatus.getLen());
            }

            byte[] idData = null;
            {
                FileStatus idFileStatus = fileSystem.getFileStatus(idsPath);
                FSDataInputStream idInputStream = fileSystem.open(idsPath);
                LOG.info("Allocating ID Array of Size:" + idFileStatus.getLen());
                idData = new byte[(int) idFileStatus.getLen()];
                LOG.info("Reading ID Array  Data Size:" + idFileStatus.getLen());
                for (int offset = 0, totalRead = 0; offset < idFileStatus.getLen();) {
                    int bytesToRead = Math.min(16384, (int) idFileStatus.getLen() - totalRead);
                    idInputStream.read(idData, offset, bytesToRead);
                    offset += bytesToRead;
                    totalRead += bytesToRead;
                }
                idInputStream.close();
                LOG.info("Finished Reading ID Array Data Size:" + idFileStatus.getLen());

            }

            DataInputStream idInputStream = new DataInputStream(new ByteArrayInputStream(idData));
            DataInputStream valueInputStream = new DataInputStream(new ByteArrayInputStream(valueData));

            SequenceFile.Writer output = SequenceFile.createWriter(fileSystem, conf, outputFile, URLFP.class,
                    VIntWritable.class);
            LOG.info("Opened Output Stream");

            URLFP currentFP = new URLFP();
            boolean eof = false;
            int itemCount = 0;
            while (!eof) {

                try {
                    long timeStart = System.currentTimeMillis();
                    currentFP.readFields(idInputStream);
                    long timeEnd = System.currentTimeMillis();
                    // LOG.info("ReadFields Took:" + (timeEnd - timeStart));
                    ++itemCount;
                } catch (EOFException e) {
                    LOG.info("EOF reached. Total Item Count:" + itemCount);
                    eof = true;
                }

                if (!eof) {
                    long valueFingerprint = WritableUtils.readVLong(valueInputStream);

                    int prValue = valueInputStream.readInt();

                    if (valueFingerprint != currentFP.getUrlHash()) {
                        throw new IOException("Fingerprint Mismatch! Expected:" + currentFP.getUrlHash()
                                + " Found:" + valueFingerprint + " ItemCount:" + itemCount);
                    }

                    output.append(currentFP, new VIntWritable(prValue));

                    if (itemCount % 10000 == 0) {
                        LOG.info("Processed " + itemCount + " Values. Last Sampled FP:" + valueFingerprint
                                + " With PR:" + prValue);
                    }
                    currentFP = new URLFP();
                }
            }
            LOG.info("Done outputing pagerank for Node:" + i + " ItemCount:" + itemCount);

            valueInputStream.close();
            idInputStream.close();
            output.close();
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }
}

From source file:org.elasticsearch.hadoop.serialization.WritableTypeToJsonTest.java

License:Apache License

@Test
public void testVInteger() {
    writableTypeToJson(new VIntWritable(Integer.MAX_VALUE));
}

From source file:org.elasticsearch.hadoop.util.WritableUtils.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public static Writable toWritable(Object object) {
    if (object instanceof Writable) {
        return (Writable) object;
    }//w  w  w . j ava  2  s .  co m
    if (object == null) {
        return NullWritable.get();
    }
    if (object instanceof String) {
        return new Text((String) object);
    }
    if (object instanceof Long) {
        return new VLongWritable((Long) object);
    }
    if (object instanceof Integer) {
        return new VIntWritable((Integer) object);
    }
    if (object instanceof Byte) {
        return new ByteWritable((Byte) object);
    }
    if (object instanceof Short) {
        return WritableCompatUtil.availableShortWritable((Short) object);
    }
    if (object instanceof Double) {
        return new DoubleWritable((Double) object);
    }
    if (object instanceof Float) {
        return new FloatWritable((Float) object);
    }
    if (object instanceof Boolean) {
        return new BooleanWritable((Boolean) object);
    }
    if (object instanceof byte[]) {
        return new BytesWritable((byte[]) object);
    }
    if (object instanceof List) {
        List<Object> list = (List<Object>) object;
        if (!list.isEmpty()) {
            Object first = list.get(0);
            Writable[] content = new Writable[list.size()];
            for (int i = 0; i < content.length; i++) {
                content[i] = toWritable(list.get(i));
            }
            return new ArrayWritable(toWritable(first).getClass(), content);
        }
        return new ArrayWritable(NullWritable.class, new Writable[0]);
    }
    if (object instanceof SortedSet) {
        SortedMapWritable smap = new SortedMapWritable();
        SortedSet<Object> set = (SortedSet) object;
        for (Object obj : set) {
            smap.put((WritableComparable) toWritable(obj), NullWritable.get());
        }
        return smap;
    }
    if (object instanceof Set) {
        MapWritable map = new MapWritable();
        Set<Object> set = (Set) object;
        for (Object obj : set) {
            map.put(toWritable(obj), NullWritable.get());
        }
        return map;
    }
    if (object instanceof SortedMap) {
        SortedMapWritable smap = new SortedMapWritable();
        Map<Object, Object> map = (Map) object;
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            smap.put((WritableComparable) toWritable(entry.getKey()), toWritable(entry.getValue()));
        }
        return smap;
    }
    if (object instanceof Map) {
        MapWritable result = new MapWritable();
        Map<Object, Object> map = (Map) object;
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            result.put(toWritable(entry.getKey()), toWritable(entry.getValue()));
        }
        return result;
    }
    // fall-back to bytearray
    return new BytesWritable(object.toString().getBytes(StringUtils.UTF_8));
}

From source file:org.terrier.structures.indexing.singlepass.hadoop.Inv2DirectMultiReduce.java

License:Mozilla Public License

/** Take an iterator of postings. Each posting is inverted, and the a new posting generated */
public void map(IntWritable termId, Wrapper<IterablePosting> postingWrapper,
        OutputCollector<VIntWritable, Posting> collector, Reporter reporter) throws IOException {
    final IterablePosting postingIterator = postingWrapper.getObject();
    reporter.setStatus("Mapping for id " + termId);
    while (postingIterator.next() != IterablePosting.EOL) {
        WritablePosting wp = postingIterator.asWritablePosting();
        int docid = postingIterator.getId();
        wp.setId(termId.get());/*from  ww  w . ja va  2  s .  c o m*/
        reporter.progress();
        collector.collect(new VIntWritable(docid), wp);
    }
}