Example usage for org.apache.hadoop.io VIntWritable VIntWritable

List of usage examples for org.apache.hadoop.io VIntWritable VIntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io VIntWritable VIntWritable.

Prototype

public VIntWritable(int value) 

Source Link

Usage

From source file:org.apache.hive.hcatalog.data.ReaderWriter.java

License:Apache License

public static void writeDatum(DataOutput out, Object val) throws IOException {
    // write the data type
    byte type = DataType.findType(val);
    out.write(type);//from   w w w.j a  va 2s. c  om
    switch (type) {
    case DataType.LIST:
        List<?> list = (List<?>) val;
        int sz = list.size();
        out.writeInt(sz);
        for (int i = 0; i < sz; i++) {
            writeDatum(out, list.get(i));
        }
        return;

    case DataType.MAP:
        Map<?, ?> m = (Map<?, ?>) val;
        out.writeInt(m.size());
        Iterator<?> i = m.entrySet().iterator();
        while (i.hasNext()) {
            Entry<?, ?> entry = (Entry<?, ?>) i.next();
            writeDatum(out, entry.getKey());
            writeDatum(out, entry.getValue());
        }
        return;

    case DataType.INTEGER:
        new VIntWritable((Integer) val).write(out);
        return;

    case DataType.LONG:
        new VLongWritable((Long) val).write(out);
        return;

    case DataType.FLOAT:
        out.writeFloat((Float) val);
        return;

    case DataType.DOUBLE:
        out.writeDouble((Double) val);
        return;

    case DataType.BOOLEAN:
        out.writeBoolean((Boolean) val);
        return;

    case DataType.BYTE:
        out.writeByte((Byte) val);
        return;

    case DataType.SHORT:
        out.writeShort((Short) val);
        return;

    case DataType.STRING:
        String s = (String) val;
        byte[] utfBytes = s.getBytes(ReaderWriter.UTF8);
        out.writeInt(utfBytes.length);
        out.write(utfBytes);
        return;

    case DataType.BINARY:
        byte[] ba = (byte[]) val;
        out.writeInt(ba.length);
        out.write(ba);
        return;

    case DataType.NULL:
        //for NULL we just write out the type
        return;
    case DataType.CHAR:
        new HiveCharWritable((HiveChar) val).write(out);
        return;
    case DataType.VARCHAR:
        new HiveVarcharWritable((HiveVarchar) val).write(out);
        return;
    case DataType.DECIMAL:
        new HiveDecimalWritable((HiveDecimal) val).write(out);
        return;
    case DataType.DATE:
        new DateWritable((Date) val).write(out);
        return;
    case DataType.TIMESTAMP:
        new TimestampWritable((java.sql.Timestamp) val).write(out);
        return;
    default:
        throw new IOException("Unexpected data type " + type + " found in stream.");
    }
}

From source file:org.commoncrawl.service.pagerank.PageRankValueReWriter.java

License:Open Source License

public static void main(String[] args) {

    int nodeIndex = Integer.parseInt(args[0]);
    LOG.info("Node Index:" + args[0]);
    int nodeCount = Integer.parseInt(args[1]);
    LOG.info("Node Count:" + args[1]);
    String idsDirectory = args[2];
    LOG.info("ID Directory is:" + args[2]);
    String valuesDirectory = args[3];
    LOG.info("Values Directory is:" + args[3]);
    int iterationNumber = Integer.parseInt(args[4]);
    LOG.info("Iteration Number is:" + args[4]);
    int runDate = Integer.parseInt(args[5]);
    LOG.info("runDate is:" + args[5]);

    Configuration conf = new Configuration();

    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site.xml");
    conf.addResource("hadoop-default.xml");
    conf.addResource("hadoop-site.xml");
    conf.addResource("commoncrawl-default.xml");
    conf.addResource("commoncrawl-site.xml");

    CrawlEnvironment.setHadoopConfig(conf);
    CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/");

    try {//  w w  w.j  a  v a  2s. co m
        FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem();

        Path outputPath = new Path("crawl/pageRank/out", Integer.toString(runDate));
        LOG.info("Output Directory is:" + outputPath);

        fileSystem.mkdirs(outputPath);

        //iterate values based on node id 
        for (int i = nodeIndex; i < nodeIndex + 1; ++i) {

            LOG.info("Processing output for Node:" + i);
            Path valuePath = new Path(valuesDirectory,
                    "value_" + NUMBER_FORMAT.format(iterationNumber) + "-" + NUMBER_FORMAT.format(i));
            LOG.info("Value File Path is:" + valuePath);
            Path idsPath = new Path(idsDirectory, "ids_" + NUMBER_FORMAT.format(i));
            LOG.info("IDs File Path is:" + idsPath);
            Path outputFile = new Path(outputPath, "part-" + NUMBER_FORMAT.format(i));
            LOG.info("Output File Path is:" + outputFile);
            byte[] valueData = null;
            {
                FileStatus valueFileStatus = fileSystem.getFileStatus(valuePath);
                FSDataInputStream valueInputStream = fileSystem.open(valuePath);
                LOG.info("Allocating Value Array of Size:" + valueFileStatus.getLen());
                valueData = new byte[(int) valueFileStatus.getLen()];
                LOG.info("Reading Value Data Size:" + valueFileStatus.getLen());
                for (int offset = 0, totalRead = 0; offset < valueFileStatus.getLen();) {
                    int bytesToRead = Math.min(16384, (int) valueFileStatus.getLen() - totalRead);
                    valueInputStream.read(valueData, offset, bytesToRead);
                    offset += bytesToRead;
                    totalRead += bytesToRead;
                }
                valueInputStream.close();
                LOG.info("Finished Reading Value Data Size:" + valueFileStatus.getLen());
            }

            byte[] idData = null;
            {
                FileStatus idFileStatus = fileSystem.getFileStatus(idsPath);
                FSDataInputStream idInputStream = fileSystem.open(idsPath);
                LOG.info("Allocating ID Array of Size:" + idFileStatus.getLen());
                idData = new byte[(int) idFileStatus.getLen()];
                LOG.info("Reading ID Array  Data Size:" + idFileStatus.getLen());
                for (int offset = 0, totalRead = 0; offset < idFileStatus.getLen();) {
                    int bytesToRead = Math.min(16384, (int) idFileStatus.getLen() - totalRead);
                    idInputStream.read(idData, offset, bytesToRead);
                    offset += bytesToRead;
                    totalRead += bytesToRead;
                }
                idInputStream.close();
                LOG.info("Finished Reading ID Array Data Size:" + idFileStatus.getLen());

            }

            DataInputStream idInputStream = new DataInputStream(new ByteArrayInputStream(idData));
            DataInputStream valueInputStream = new DataInputStream(new ByteArrayInputStream(valueData));

            SequenceFile.Writer output = SequenceFile.createWriter(fileSystem, conf, outputFile, URLFP.class,
                    VIntWritable.class);
            LOG.info("Opened Output Stream");

            URLFP currentFP = new URLFP();
            boolean eof = false;
            int itemCount = 0;
            while (!eof) {

                try {
                    long timeStart = System.currentTimeMillis();
                    currentFP.readFields(idInputStream);
                    long timeEnd = System.currentTimeMillis();
                    // LOG.info("ReadFields Took:" + (timeEnd - timeStart));
                    ++itemCount;
                } catch (EOFException e) {
                    LOG.info("EOF reached. Total Item Count:" + itemCount);
                    eof = true;
                }

                if (!eof) {
                    long valueFingerprint = WritableUtils.readVLong(valueInputStream);

                    int prValue = valueInputStream.readInt();

                    if (valueFingerprint != currentFP.getUrlHash()) {
                        throw new IOException("Fingerprint Mismatch! Expected:" + currentFP.getUrlHash()
                                + " Found:" + valueFingerprint + " ItemCount:" + itemCount);
                    }

                    output.append(currentFP, new VIntWritable(prValue));

                    if (itemCount % 10000 == 0) {
                        LOG.info("Processed " + itemCount + " Values. Last Sampled FP:" + valueFingerprint
                                + " With PR:" + prValue);
                    }
                    currentFP = new URLFP();
                }
            }
            LOG.info("Done outputing pagerank for Node:" + i + " ItemCount:" + itemCount);

            valueInputStream.close();
            idInputStream.close();
            output.close();
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    }
}

From source file:org.elasticsearch.hadoop.serialization.WritableTypeToJsonTest.java

License:Apache License

@Test
public void testVInteger() {
    writableTypeToJson(new VIntWritable(Integer.MAX_VALUE));
}

From source file:org.elasticsearch.hadoop.util.WritableUtils.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
public static Writable toWritable(Object object) {
    if (object instanceof Writable) {
        return (Writable) object;
    }//w  w  w . j ava  2  s .  co m
    if (object == null) {
        return NullWritable.get();
    }
    if (object instanceof String) {
        return new Text((String) object);
    }
    if (object instanceof Long) {
        return new VLongWritable((Long) object);
    }
    if (object instanceof Integer) {
        return new VIntWritable((Integer) object);
    }
    if (object instanceof Byte) {
        return new ByteWritable((Byte) object);
    }
    if (object instanceof Short) {
        return WritableCompatUtil.availableShortWritable((Short) object);
    }
    if (object instanceof Double) {
        return new DoubleWritable((Double) object);
    }
    if (object instanceof Float) {
        return new FloatWritable((Float) object);
    }
    if (object instanceof Boolean) {
        return new BooleanWritable((Boolean) object);
    }
    if (object instanceof byte[]) {
        return new BytesWritable((byte[]) object);
    }
    if (object instanceof List) {
        List<Object> list = (List<Object>) object;
        if (!list.isEmpty()) {
            Object first = list.get(0);
            Writable[] content = new Writable[list.size()];
            for (int i = 0; i < content.length; i++) {
                content[i] = toWritable(list.get(i));
            }
            return new ArrayWritable(toWritable(first).getClass(), content);
        }
        return new ArrayWritable(NullWritable.class, new Writable[0]);
    }
    if (object instanceof SortedSet) {
        SortedMapWritable smap = new SortedMapWritable();
        SortedSet<Object> set = (SortedSet) object;
        for (Object obj : set) {
            smap.put((WritableComparable) toWritable(obj), NullWritable.get());
        }
        return smap;
    }
    if (object instanceof Set) {
        MapWritable map = new MapWritable();
        Set<Object> set = (Set) object;
        for (Object obj : set) {
            map.put(toWritable(obj), NullWritable.get());
        }
        return map;
    }
    if (object instanceof SortedMap) {
        SortedMapWritable smap = new SortedMapWritable();
        Map<Object, Object> map = (Map) object;
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            smap.put((WritableComparable) toWritable(entry.getKey()), toWritable(entry.getValue()));
        }
        return smap;
    }
    if (object instanceof Map) {
        MapWritable result = new MapWritable();
        Map<Object, Object> map = (Map) object;
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            result.put(toWritable(entry.getKey()), toWritable(entry.getValue()));
        }
        return result;
    }
    // fall-back to bytearray
    return new BytesWritable(object.toString().getBytes(StringUtils.UTF_8));
}

From source file:org.terrier.structures.indexing.singlepass.hadoop.Inv2DirectMultiReduce.java

License:Mozilla Public License

/** Take an iterator of postings. Each posting is inverted, and the a new posting generated */
public void map(IntWritable termId, Wrapper<IterablePosting> postingWrapper,
        OutputCollector<VIntWritable, Posting> collector, Reporter reporter) throws IOException {
    final IterablePosting postingIterator = postingWrapper.getObject();
    reporter.setStatus("Mapping for id " + termId);
    while (postingIterator.next() != IterablePosting.EOL) {
        WritablePosting wp = postingIterator.asWritablePosting();
        int docid = postingIterator.getId();
        wp.setId(termId.get());/*from  ww  w . ja va  2  s .  c o m*/
        reporter.progress();
        collector.collect(new VIntWritable(docid), wp);
    }
}