Example usage for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable()

Source Link

Usage

From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

License:Apache License

static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
        throws IOException {

    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == 0) {
        return null;
    }/*from  w w w .  j  a v  a2s  .  co  m*/
    assert (isNull == 1);

    switch (type.getCategory()) {
    case PRIMITIVE: {
        PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
        switch (ptype.getPrimitiveCategory()) {
        case VOID: {
            return null;
        }
        case BOOLEAN: {
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            byte b = buffer.read(invert);
            assert (b == 1 || b == 2);
            r.set(b == 2);
            return r;
        }
        case BYTE: {
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set((byte) (buffer.read(invert) ^ 0x80));
            return r;
        }
        case SHORT: {
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            int v = buffer.read(invert) ^ 0x80;
            v = (v << 8) + (buffer.read(invert) & 0xff);
            r.set((short) v);
            return r;
        }
        case INT: {
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(deserializeInt(buffer, invert));
            return r;
        }
        case LONG: {
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            long v = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 7; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            r.set(v);
            return r;
        }
        case FLOAT: {
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            int v = 0;
            for (int i = 0; i < 4; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1 << 31)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1 << 31);
            }
            r.set(Float.intBitsToFloat(v));
            return r;
        }
        case DOUBLE: {
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            long v = 0;
            for (int i = 0; i < 8; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1L << 63)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1L << 63);
            }
            r.set(Double.longBitsToDouble(v));
            return r;
        }
        case STRING: {
            Text r = reuse == null ? new Text() : (Text) reuse;
            return deserializeText(buffer, invert, r);
        }

        case VARCHAR: {
            HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
            // Use HiveVarchar's internal Text member to read the value.
            deserializeText(buffer, invert, r.getTextValue());
            // If we cache helper data for deserialization we could avoid having
            // to call getVarcharMaxLength() on every deserialize call.
            r.enforceMaxLength(getVarcharMaxLength(type));
            return r;
        }

        case BINARY: {
            BytesWritable bw = new BytesWritable();
            // Get the actual length first
            int start = buffer.tell();
            int length = 0;
            do {
                byte b = buffer.read(invert);
                if (b == 0) {
                    // end of string
                    break;
                }
                if (b == 1) {
                    // the last char is an escape char. read the actual char
                    buffer.read(invert);
                }
                length++;
            } while (true);

            if (length == buffer.tell() - start) {
                // No escaping happened, so we are already done.
                bw.set(buffer.getData(), start, length);
            } else {
                // Escaping happened, we need to copy byte-by-byte.
                // 1. Set the length first.
                bw.set(buffer.getData(), start, length);
                // 2. Reset the pointer.
                buffer.seek(start);
                // 3. Copy the data.
                byte[] rdata = bw.getBytes();
                for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                        // The last char is an escape char, read the actual char.
                        // The serialization format escape \0 to \1, and \1 to \2,
                        // to make sure the string is null-terminated.
                        b = (byte) (buffer.read(invert) - 1);
                    }
                    rdata[i] = b;
                }
                // 4. Read the null terminator.
                byte b = buffer.read(invert);
                assert (b == 0);
            }
            return bw;
        }

        case DATE: {
            DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
            d.set(deserializeInt(buffer, invert));
            return d;
        }

        case TIMESTAMP:
            TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
            byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];

            for (int i = 0; i < bytes.length; i++) {
                bytes[i] = buffer.read(invert);
            }
            t.setBinarySortable(bytes, 0);
            return t;

        case DECIMAL: {
            // See serialization of decimal for explanation (below)

            HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);

            int b = buffer.read(invert) - 1;
            assert (b == 1 || b == -1 || b == 0);
            boolean positive = b != -1;

            int factor = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 3; i++) {
                factor = (factor << 8) + (buffer.read(invert) & 0xff);
            }

            if (!positive) {
                factor = -factor;
            }

            int start = buffer.tell();
            int length = 0;

            do {
                b = buffer.read(positive ? invert : !invert);
                assert (b != 1);

                if (b == 0) {
                    // end of digits
                    break;
                }

                length++;
            } while (true);

            if (decimalBuffer == null || decimalBuffer.length < length) {
                decimalBuffer = new byte[length];
            }

            buffer.seek(start);
            for (int i = 0; i < length; ++i) {
                decimalBuffer[i] = buffer.read(positive ? invert : !invert);
            }

            // read the null byte again
            buffer.read(positive ? invert : !invert);

            String digits = new String(decimalBuffer, 0, length, decimalCharSet);
            BigInteger bi = new BigInteger(digits);
            HiveDecimal bd = new HiveDecimal(bi).scaleByPowerOfTen(factor - length);

            if (!positive) {
                bd = bd.negate();
            }

            bdw.set(bd);
            return bdw;
        }

        default: {
            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
        }
        }
    }

    case LIST: {
        ListTypeInfo ltype = (ListTypeInfo) type;
        TypeInfo etype = ltype.getListElementTypeInfo();

        // Create the list if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

        // Read the list
        int size = 0;
        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each element
            assert (more == 1);
            if (size == r.size()) {
                r.add(null);
            }
            r.set(size, deserialize(buffer, etype, invert, r.get(size)));
            size++;
        }
        // Remove additional elements if the list is reused
        while (r.size() > size) {
            r.remove(r.size() - 1);
        }
        return r;
    }
    case MAP: {
        MapTypeInfo mtype = (MapTypeInfo) type;
        TypeInfo ktype = mtype.getMapKeyTypeInfo();
        TypeInfo vtype = mtype.getMapValueTypeInfo();

        // Create the map if needed
        Map<Object, Object> r;
        if (reuse == null) {
            r = new HashMap<Object, Object>();
        } else {
            r = (HashMap<Object, Object>) reuse;
            r.clear();
        }

        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each key and then each value
            assert (more == 1);
            Object k = deserialize(buffer, ktype, invert, null);
            Object v = deserialize(buffer, vtype, invert, null);
            r.put(k, v);
        }
        return r;
    }
    case STRUCT: {
        StructTypeInfo stype = (StructTypeInfo) type;
        List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
        int size = fieldTypes.size();
        // Create the struct if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
        assert (r.size() <= size);
        // Set the size of the struct
        while (r.size() < size) {
            r.add(null);
        }
        // Read one field by one field
        for (int eid = 0; eid < size; eid++) {
            r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
        }
        return r;
    }
    case UNION: {
        UnionTypeInfo utype = (UnionTypeInfo) type;
        StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
        // Read the tag
        byte tag = buffer.read(invert);
        r.setTag(tag);
        r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
        return r;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + type.getCategory());
    }
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyInteger.java

License:Apache License

public LazyInteger(LazyIntObjectInspector oi) {
    super(oi);
    data = new IntWritable();
}

From source file:com.ebay.nest.io.sede.lazybinary.LazyBinaryInteger.java

License:Apache License

LazyBinaryInteger(WritableIntObjectInspector oi) {
    super(oi);
    data = new IntWritable();
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static int getNumTerms(Configuration conf, Path dictionaryPath) throws IOException {
    FileSystem fs = dictionaryPath.getFileSystem(conf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    int maxTermId = -1;
    for (FileStatus stat : fs.globStatus(dictionaryPath)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, stat.getPath(), conf);
        while (reader.next(key, value)) {
            maxTermId = Math.max(maxTermId, value.get());
        }//  w  w w  .ja  va2s  .co  m
    }
    return maxTermId + 1;
}

From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java

License:Apache License

/**
 * Run the kmeans clustering job on an input dataset using the given the
 * number of clusters k and iteration parameters. All output data will be
 * written to the output directory, which will be initially deleted if it
 * exists. The clustered points will reside in the path
 * <output>/clustered-points. By default, the job expects a file containing
 * equal length space delimited data that resides in a directory named
 * "testdata", and writes output to a directory named "output".
 *
 * @param conf the Configuration to use/*from  ww w .j av  a2 s .  c o  m*/
 * @param input the String denoting the input directory path
 * @param output the String denoting the output directory path
 * @param measure the DistanceMeasure to use
 * @param k the number of clusters in Kmeans
 * @param convergenceDelta the double convergence criteria for iterations
 * @param maxIterations the int maximum number of iterations
 */
public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, int k,
        double convergenceDelta, int maxIterations) throws Exception {
    Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
    log.info("Preparing Input");
    InputDriver.runJob(input, directoryContainingConvertedInput,
            "org.apache.mahout.math.RandomAccessSparseVector");
    log.info("Running random seed to get initial clusters");
    Path clusters = new Path(output, "random-seeds");
    clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
    System.out.println("****************************************************************************");

    log.info("Running KMeans with k = {}", k);
    KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
            maxIterations, true, 0.0, false);
    // run ClusterDumper
    Path outGlob = new Path(output, "clusters-*-final");
    Path clusteredPoints = new Path(output, "clusteredPoints");
    log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
    ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
    clusterDumper.printClusters(null);

    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
            new Path("output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
    IntWritable key = new IntWritable();
    WeightedVectorWritable value = new WeightedVectorWritable();
    while (reader.next(key, value)) {
        System.out.println(value.toString() + " belongs to cluster " + key.toString());
    }
    reader.close();
}

From source file:com.facebook.hive.orc.lazy.LazyIntDictionaryTreeReader.java

License:Open Source License

IntWritable createWritable(Object previous, int v) throws IOException {
    IntWritable result = null;/*from w  w  w  .ja va  2  s. co  m*/
    if (previous == null) {
        result = new IntWritable();
    } else {
        result = (IntWritable) previous;
    }
    result.set(v);
    return result;
}

From source file:com.facebook.LinkBench.LinkBenchDriverMR.java

License:Apache License

/**
 * read output from the map reduce job/*from   w ww.jav  a 2  s . co  m*/
 * @param fs the DFS FileSystem
 * @param jobconf configuration of the map reduce job
 */
public static long readOutput(FileSystem fs, JobConf jobconf) throws IOException, InterruptedException {
    //read outputs
    final Path outdir = new Path(TMP_DIR, "out");
    Path infile = new Path(outdir, "reduce-out");
    IntWritable nworkers = new IntWritable();
    LongWritable result = new LongWritable();
    long output = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, infile, jobconf);
    try {
        reader.next(nworkers, result);
        output = result.get();
    } finally {
        reader.close();
    }
    return output;
}

From source file:com.github.sakserv.sequencefile.SequenceFileReader.java

License:Apache License

public static void main(String[] args) {

    String inputFile = args[0];/* w  ww  . java  2s.c  o m*/

    Configuration conf = new Configuration();
    try {

        Path seqFilePath = new Path(inputFile);

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(seqFilePath));

        Text key = new Text();
        IntWritable val = new IntWritable();

        while (reader.next(key, val)) {
            LOG.info("Sequence File Data: Key: " + key + "\tValue: " + val);
        }

        reader.close();
    } catch (IOException e) {
        LOG.error("ERROR: Could not load hadoop configuration");
        e.printStackTrace();
    }

}

From source file:com.github.ygf.pagerank.InLinksTopNReducer.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("inlinks.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    int[] inLinks = new int[topN.size()];
    String[] titles = new String[topN.size()];

    for (int i = inLinks.length - 1; i >= 0; i--) {
        Map.Entry<Integer, Integer> entry = topN.poll();
        page.set(entry.getValue());/*from   w ww  .  jav  a2 s .  c om*/
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        inLinks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < inLinks.length; i++) {
        context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
    }
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private int getNumPages(Configuration conf, Path titlesDir) throws Exception {

    int numPages = 0;

    IntWritable pageNumber = new IntWritable();
    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    for (int i = 0; i < readers.length; i++) {
        readers[i].finalKey(pageNumber);
        if (pageNumber.get() > numPages) {
            numPages = pageNumber.get();
        }//from ww  w.  j a va  2 s . c  o  m
    }
    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    return numPages;
}