Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

License:Apache License

static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse)
        throws IOException {

    // Is this field a null?
    byte isNull = buffer.read(invert);
    if (isNull == 0) {
        return null;
    }/*from  w w w .  j  a v  a2s  .  co  m*/
    assert (isNull == 1);

    switch (type.getCategory()) {
    case PRIMITIVE: {
        PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type;
        switch (ptype.getPrimitiveCategory()) {
        case VOID: {
            return null;
        }
        case BOOLEAN: {
            BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse;
            byte b = buffer.read(invert);
            assert (b == 1 || b == 2);
            r.set(b == 2);
            return r;
        }
        case BYTE: {
            ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse;
            r.set((byte) (buffer.read(invert) ^ 0x80));
            return r;
        }
        case SHORT: {
            ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse;
            int v = buffer.read(invert) ^ 0x80;
            v = (v << 8) + (buffer.read(invert) & 0xff);
            r.set((short) v);
            return r;
        }
        case INT: {
            IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse;
            r.set(deserializeInt(buffer, invert));
            return r;
        }
        case LONG: {
            LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse;
            long v = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 7; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            r.set(v);
            return r;
        }
        case FLOAT: {
            FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse;
            int v = 0;
            for (int i = 0; i < 4; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1 << 31)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1 << 31);
            }
            r.set(Float.intBitsToFloat(v));
            return r;
        }
        case DOUBLE: {
            DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse;
            long v = 0;
            for (int i = 0; i < 8; i++) {
                v = (v << 8) + (buffer.read(invert) & 0xff);
            }
            if ((v & (1L << 63)) == 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1L << 63);
            }
            r.set(Double.longBitsToDouble(v));
            return r;
        }
        case STRING: {
            Text r = reuse == null ? new Text() : (Text) reuse;
            return deserializeText(buffer, invert, r);
        }

        case VARCHAR: {
            HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse;
            // Use HiveVarchar's internal Text member to read the value.
            deserializeText(buffer, invert, r.getTextValue());
            // If we cache helper data for deserialization we could avoid having
            // to call getVarcharMaxLength() on every deserialize call.
            r.enforceMaxLength(getVarcharMaxLength(type));
            return r;
        }

        case BINARY: {
            BytesWritable bw = new BytesWritable();
            // Get the actual length first
            int start = buffer.tell();
            int length = 0;
            do {
                byte b = buffer.read(invert);
                if (b == 0) {
                    // end of string
                    break;
                }
                if (b == 1) {
                    // the last char is an escape char. read the actual char
                    buffer.read(invert);
                }
                length++;
            } while (true);

            if (length == buffer.tell() - start) {
                // No escaping happened, so we are already done.
                bw.set(buffer.getData(), start, length);
            } else {
                // Escaping happened, we need to copy byte-by-byte.
                // 1. Set the length first.
                bw.set(buffer.getData(), start, length);
                // 2. Reset the pointer.
                buffer.seek(start);
                // 3. Copy the data.
                byte[] rdata = bw.getBytes();
                for (int i = 0; i < length; i++) {
                    byte b = buffer.read(invert);
                    if (b == 1) {
                        // The last char is an escape char, read the actual char.
                        // The serialization format escape \0 to \1, and \1 to \2,
                        // to make sure the string is null-terminated.
                        b = (byte) (buffer.read(invert) - 1);
                    }
                    rdata[i] = b;
                }
                // 4. Read the null terminator.
                byte b = buffer.read(invert);
                assert (b == 0);
            }
            return bw;
        }

        case DATE: {
            DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse;
            d.set(deserializeInt(buffer, invert));
            return d;
        }

        case TIMESTAMP:
            TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse);
            byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH];

            for (int i = 0; i < bytes.length; i++) {
                bytes[i] = buffer.read(invert);
            }
            t.setBinarySortable(bytes, 0);
            return t;

        case DECIMAL: {
            // See serialization of decimal for explanation (below)

            HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse);

            int b = buffer.read(invert) - 1;
            assert (b == 1 || b == -1 || b == 0);
            boolean positive = b != -1;

            int factor = buffer.read(invert) ^ 0x80;
            for (int i = 0; i < 3; i++) {
                factor = (factor << 8) + (buffer.read(invert) & 0xff);
            }

            if (!positive) {
                factor = -factor;
            }

            int start = buffer.tell();
            int length = 0;

            do {
                b = buffer.read(positive ? invert : !invert);
                assert (b != 1);

                if (b == 0) {
                    // end of digits
                    break;
                }

                length++;
            } while (true);

            if (decimalBuffer == null || decimalBuffer.length < length) {
                decimalBuffer = new byte[length];
            }

            buffer.seek(start);
            for (int i = 0; i < length; ++i) {
                decimalBuffer[i] = buffer.read(positive ? invert : !invert);
            }

            // read the null byte again
            buffer.read(positive ? invert : !invert);

            String digits = new String(decimalBuffer, 0, length, decimalCharSet);
            BigInteger bi = new BigInteger(digits);
            HiveDecimal bd = new HiveDecimal(bi).scaleByPowerOfTen(factor - length);

            if (!positive) {
                bd = bd.negate();
            }

            bdw.set(bd);
            return bdw;
        }

        default: {
            throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory());
        }
        }
    }

    case LIST: {
        ListTypeInfo ltype = (ListTypeInfo) type;
        TypeInfo etype = ltype.getListElementTypeInfo();

        // Create the list if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse;

        // Read the list
        int size = 0;
        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each element
            assert (more == 1);
            if (size == r.size()) {
                r.add(null);
            }
            r.set(size, deserialize(buffer, etype, invert, r.get(size)));
            size++;
        }
        // Remove additional elements if the list is reused
        while (r.size() > size) {
            r.remove(r.size() - 1);
        }
        return r;
    }
    case MAP: {
        MapTypeInfo mtype = (MapTypeInfo) type;
        TypeInfo ktype = mtype.getMapKeyTypeInfo();
        TypeInfo vtype = mtype.getMapValueTypeInfo();

        // Create the map if needed
        Map<Object, Object> r;
        if (reuse == null) {
            r = new HashMap<Object, Object>();
        } else {
            r = (HashMap<Object, Object>) reuse;
            r.clear();
        }

        while (true) {
            int more = buffer.read(invert);
            if (more == 0) {
                // \0 to terminate
                break;
            }
            // \1 followed by each key and then each value
            assert (more == 1);
            Object k = deserialize(buffer, ktype, invert, null);
            Object v = deserialize(buffer, vtype, invert, null);
            r.put(k, v);
        }
        return r;
    }
    case STRUCT: {
        StructTypeInfo stype = (StructTypeInfo) type;
        List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos();
        int size = fieldTypes.size();
        // Create the struct if needed
        ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse;
        assert (r.size() <= size);
        // Set the size of the struct
        while (r.size() < size) {
            r.add(null);
        }
        // Read one field by one field
        for (int eid = 0; eid < size; eid++) {
            r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid)));
        }
        return r;
    }
    case UNION: {
        UnionTypeInfo utype = (UnionTypeInfo) type;
        StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse;
        // Read the tag
        byte tag = buffer.read(invert);
        r.setTag(tag);
        r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null));
        return r;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + type.getCategory());
    }
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyInteger.java

License:Apache License

public LazyInteger(LazyIntObjectInspector oi) {
    super(oi);
    data = new IntWritable();
}

From source file:com.ebay.nest.io.sede.lazybinary.LazyBinaryInteger.java

License:Apache License

LazyBinaryInteger(WritableIntObjectInspector oi) {
    super(oi);
    data = new IntWritable();
}

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static int getNumTerms(Configuration conf, Path dictionaryPath) throws IOException {
    FileSystem fs = dictionaryPath.getFileSystem(conf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    int maxTermId = -1;
    for (FileStatus stat : fs.globStatus(dictionaryPath)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, stat.getPath(), conf);
        while (reader.next(key, value)) {
            maxTermId = Math.max(maxTermId, value.get());
        }//  w  w w  .ja  va2s  .co  m
    }
    return maxTermId + 1;
}

From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java

License:Apache License

/**
 * Run the kmeans clustering job on an input dataset using the given the
 * number of clusters k and iteration parameters. All output data will be
 * written to the output directory, which will be initially deleted if it
 * exists. The clustered points will reside in the path
 * <output>/clustered-points. By default, the job expects a file containing
 * equal length space delimited data that resides in a directory named
 * "testdata", and writes output to a directory named "output".
 *
 * @param conf the Configuration to use/*from  ww w .j av  a2 s .  c o  m*/
 * @param input the String denoting the input directory path
 * @param output the String denoting the output directory path
 * @param measure the DistanceMeasure to use
 * @param k the number of clusters in Kmeans
 * @param convergenceDelta the double convergence criteria for iterations
 * @param maxIterations the int maximum number of iterations
 */
public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, int k,
        double convergenceDelta, int maxIterations) throws Exception {
    Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT);
    log.info("Preparing Input");
    InputDriver.runJob(input, directoryContainingConvertedInput,
            "org.apache.mahout.math.RandomAccessSparseVector");
    log.info("Running random seed to get initial clusters");
    Path clusters = new Path(output, "random-seeds");
    clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure);
    System.out.println("****************************************************************************");

    log.info("Running KMeans with k = {}", k);
    KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta,
            maxIterations, true, 0.0, false);
    // run ClusterDumper
    Path outGlob = new Path(output, "clusters-*-final");
    Path clusteredPoints = new Path(output, "clusteredPoints");
    log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints);
    ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints);
    clusterDumper.printClusters(null);

    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
            new Path("output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
    IntWritable key = new IntWritable();
    WeightedVectorWritable value = new WeightedVectorWritable();
    while (reader.next(key, value)) {
        System.out.println(value.toString() + " belongs to cluster " + key.toString());
    }
    reader.close();
}

From source file:com.facebook.hive.orc.lazy.LazyIntDictionaryTreeReader.java

License:Open Source License

IntWritable createWritable(Object previous, int v) throws IOException {
    IntWritable result = null;/*from w  w  w  .ja va  2  s. co  m*/
    if (previous == null) {
        result = new IntWritable();
    } else {
        result = (IntWritable) previous;
    }
    result.set(v);
    return result;
}

From source file:com.facebook.LinkBench.LinkBenchDriverMR.java

License:Apache License

/**
 * read output from the map reduce job/*from   w ww.jav  a 2  s . co  m*/
 * @param fs the DFS FileSystem
 * @param jobconf configuration of the map reduce job
 */
public static long readOutput(FileSystem fs, JobConf jobconf) throws IOException, InterruptedException {
    //read outputs
    final Path outdir = new Path(TMP_DIR, "out");
    Path infile = new Path(outdir, "reduce-out");
    IntWritable nworkers = new IntWritable();
    LongWritable result = new LongWritable();
    long output = 0;
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, infile, jobconf);
    try {
        reader.next(nworkers, result);
        output = result.get();
    } finally {
        reader.close();
    }
    return output;
}

From source file:com.github.sakserv.sequencefile.SequenceFileReader.java

License:Apache License

public static void main(String[] args) {

    String inputFile = args[0];/* w  ww  . java  2s.c  o m*/

    Configuration conf = new Configuration();
    try {

        Path seqFilePath = new Path(inputFile);

        SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(seqFilePath));

        Text key = new Text();
        IntWritable val = new IntWritable();

        while (reader.next(key, val)) {
            LOG.info("Sequence File Data: Key: " + key + "\tValue: " + val);
        }

        reader.close();
    } catch (IOException e) {
        LOG.error("ERROR: Could not load hadoop configuration");
        e.printStackTrace();
    }

}

From source file:com.github.ygf.pagerank.InLinksTopNReducer.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("inlinks.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    int[] inLinks = new int[topN.size()];
    String[] titles = new String[topN.size()];

    for (int i = inLinks.length - 1; i >= 0; i--) {
        Map.Entry<Integer, Integer> entry = topN.poll();
        page.set(entry.getValue());/*from   w ww  .  jav  a2 s .  c om*/
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        inLinks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < inLinks.length; i++) {
        context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
    }
}

From source file:com.github.ygf.pagerank.PageRank.java

License:Apache License

private int getNumPages(Configuration conf, Path titlesDir) throws Exception {

    int numPages = 0;

    IntWritable pageNumber = new IntWritable();
    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    for (int i = 0; i < readers.length; i++) {
        readers[i].finalKey(pageNumber);
        if (pageNumber.get() > numPages) {
            numPages = pageNumber.get();
        }//from ww  w.  j a va  2 s . c  o  m
    }
    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    return numPages;
}