List of usage examples for org.apache.hadoop.io IntWritable IntWritable
public IntWritable()
From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java
License:Apache License
static Object deserialize(InputByteBuffer buffer, TypeInfo type, boolean invert, Object reuse) throws IOException { // Is this field a null? byte isNull = buffer.read(invert); if (isNull == 0) { return null; }/*from w w w . j a v a2s . co m*/ assert (isNull == 1); switch (type.getCategory()) { case PRIMITIVE: { PrimitiveTypeInfo ptype = (PrimitiveTypeInfo) type; switch (ptype.getPrimitiveCategory()) { case VOID: { return null; } case BOOLEAN: { BooleanWritable r = reuse == null ? new BooleanWritable() : (BooleanWritable) reuse; byte b = buffer.read(invert); assert (b == 1 || b == 2); r.set(b == 2); return r; } case BYTE: { ByteWritable r = reuse == null ? new ByteWritable() : (ByteWritable) reuse; r.set((byte) (buffer.read(invert) ^ 0x80)); return r; } case SHORT: { ShortWritable r = reuse == null ? new ShortWritable() : (ShortWritable) reuse; int v = buffer.read(invert) ^ 0x80; v = (v << 8) + (buffer.read(invert) & 0xff); r.set((short) v); return r; } case INT: { IntWritable r = reuse == null ? new IntWritable() : (IntWritable) reuse; r.set(deserializeInt(buffer, invert)); return r; } case LONG: { LongWritable r = reuse == null ? new LongWritable() : (LongWritable) reuse; long v = buffer.read(invert) ^ 0x80; for (int i = 0; i < 7; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } r.set(v); return r; } case FLOAT: { FloatWritable r = reuse == null ? new FloatWritable() : (FloatWritable) reuse; int v = 0; for (int i = 0; i < 4; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } if ((v & (1 << 31)) == 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1 << 31); } r.set(Float.intBitsToFloat(v)); return r; } case DOUBLE: { DoubleWritable r = reuse == null ? new DoubleWritable() : (DoubleWritable) reuse; long v = 0; for (int i = 0; i < 8; i++) { v = (v << 8) + (buffer.read(invert) & 0xff); } if ((v & (1L << 63)) == 0) { // negative number, flip all bits v = ~v; } else { // positive number, flip the first bit v = v ^ (1L << 63); } r.set(Double.longBitsToDouble(v)); return r; } case STRING: { Text r = reuse == null ? new Text() : (Text) reuse; return deserializeText(buffer, invert, r); } case VARCHAR: { HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse; // Use HiveVarchar's internal Text member to read the value. deserializeText(buffer, invert, r.getTextValue()); // If we cache helper data for deserialization we could avoid having // to call getVarcharMaxLength() on every deserialize call. r.enforceMaxLength(getVarcharMaxLength(type)); return r; } case BINARY: { BytesWritable bw = new BytesWritable(); // Get the actual length first int start = buffer.tell(); int length = 0; do { byte b = buffer.read(invert); if (b == 0) { // end of string break; } if (b == 1) { // the last char is an escape char. read the actual char buffer.read(invert); } length++; } while (true); if (length == buffer.tell() - start) { // No escaping happened, so we are already done. bw.set(buffer.getData(), start, length); } else { // Escaping happened, we need to copy byte-by-byte. // 1. Set the length first. bw.set(buffer.getData(), start, length); // 2. Reset the pointer. buffer.seek(start); // 3. Copy the data. byte[] rdata = bw.getBytes(); for (int i = 0; i < length; i++) { byte b = buffer.read(invert); if (b == 1) { // The last char is an escape char, read the actual char. // The serialization format escape \0 to \1, and \1 to \2, // to make sure the string is null-terminated. b = (byte) (buffer.read(invert) - 1); } rdata[i] = b; } // 4. Read the null terminator. byte b = buffer.read(invert); assert (b == 0); } return bw; } case DATE: { DateWritable d = reuse == null ? new DateWritable() : (DateWritable) reuse; d.set(deserializeInt(buffer, invert)); return d; } case TIMESTAMP: TimestampWritable t = (reuse == null ? new TimestampWritable() : (TimestampWritable) reuse); byte[] bytes = new byte[TimestampWritable.BINARY_SORTABLE_LENGTH]; for (int i = 0; i < bytes.length; i++) { bytes[i] = buffer.read(invert); } t.setBinarySortable(bytes, 0); return t; case DECIMAL: { // See serialization of decimal for explanation (below) HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse); int b = buffer.read(invert) - 1; assert (b == 1 || b == -1 || b == 0); boolean positive = b != -1; int factor = buffer.read(invert) ^ 0x80; for (int i = 0; i < 3; i++) { factor = (factor << 8) + (buffer.read(invert) & 0xff); } if (!positive) { factor = -factor; } int start = buffer.tell(); int length = 0; do { b = buffer.read(positive ? invert : !invert); assert (b != 1); if (b == 0) { // end of digits break; } length++; } while (true); if (decimalBuffer == null || decimalBuffer.length < length) { decimalBuffer = new byte[length]; } buffer.seek(start); for (int i = 0; i < length; ++i) { decimalBuffer[i] = buffer.read(positive ? invert : !invert); } // read the null byte again buffer.read(positive ? invert : !invert); String digits = new String(decimalBuffer, 0, length, decimalCharSet); BigInteger bi = new BigInteger(digits); HiveDecimal bd = new HiveDecimal(bi).scaleByPowerOfTen(factor - length); if (!positive) { bd = bd.negate(); } bdw.set(bd); return bdw; } default: { throw new RuntimeException("Unrecognized type: " + ptype.getPrimitiveCategory()); } } } case LIST: { ListTypeInfo ltype = (ListTypeInfo) type; TypeInfo etype = ltype.getListElementTypeInfo(); // Create the list if needed ArrayList<Object> r = reuse == null ? new ArrayList<Object>() : (ArrayList<Object>) reuse; // Read the list int size = 0; while (true) { int more = buffer.read(invert); if (more == 0) { // \0 to terminate break; } // \1 followed by each element assert (more == 1); if (size == r.size()) { r.add(null); } r.set(size, deserialize(buffer, etype, invert, r.get(size))); size++; } // Remove additional elements if the list is reused while (r.size() > size) { r.remove(r.size() - 1); } return r; } case MAP: { MapTypeInfo mtype = (MapTypeInfo) type; TypeInfo ktype = mtype.getMapKeyTypeInfo(); TypeInfo vtype = mtype.getMapValueTypeInfo(); // Create the map if needed Map<Object, Object> r; if (reuse == null) { r = new HashMap<Object, Object>(); } else { r = (HashMap<Object, Object>) reuse; r.clear(); } while (true) { int more = buffer.read(invert); if (more == 0) { // \0 to terminate break; } // \1 followed by each key and then each value assert (more == 1); Object k = deserialize(buffer, ktype, invert, null); Object v = deserialize(buffer, vtype, invert, null); r.put(k, v); } return r; } case STRUCT: { StructTypeInfo stype = (StructTypeInfo) type; List<TypeInfo> fieldTypes = stype.getAllStructFieldTypeInfos(); int size = fieldTypes.size(); // Create the struct if needed ArrayList<Object> r = reuse == null ? new ArrayList<Object>(size) : (ArrayList<Object>) reuse; assert (r.size() <= size); // Set the size of the struct while (r.size() < size) { r.add(null); } // Read one field by one field for (int eid = 0; eid < size; eid++) { r.set(eid, deserialize(buffer, fieldTypes.get(eid), invert, r.get(eid))); } return r; } case UNION: { UnionTypeInfo utype = (UnionTypeInfo) type; StandardUnion r = reuse == null ? new StandardUnion() : (StandardUnion) reuse; // Read the tag byte tag = buffer.read(invert); r.setTag(tag); r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), invert, null)); return r; } default: { throw new RuntimeException("Unrecognized type: " + type.getCategory()); } } }
From source file:com.ebay.nest.io.sede.lazy.LazyInteger.java
License:Apache License
public LazyInteger(LazyIntObjectInspector oi) { super(oi); data = new IntWritable(); }
From source file:com.ebay.nest.io.sede.lazybinary.LazyBinaryInteger.java
License:Apache License
LazyBinaryInteger(WritableIntObjectInspector oi) { super(oi); data = new IntWritable(); }
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static int getNumTerms(Configuration conf, Path dictionaryPath) throws IOException { FileSystem fs = dictionaryPath.getFileSystem(conf); Text key = new Text(); IntWritable value = new IntWritable(); int maxTermId = -1; for (FileStatus stat : fs.globStatus(dictionaryPath)) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, stat.getPath(), conf); while (reader.next(key, value)) { maxTermId = Math.max(maxTermId, value.get()); }// w w w .ja va2s .co m } return maxTermId + 1; }
From source file:com.eniyitavsiye.mahoutx.hadoop.Job.java
License:Apache License
/** * Run the kmeans clustering job on an input dataset using the given the * number of clusters k and iteration parameters. All output data will be * written to the output directory, which will be initially deleted if it * exists. The clustered points will reside in the path * <output>/clustered-points. By default, the job expects a file containing * equal length space delimited data that resides in a directory named * "testdata", and writes output to a directory named "output". * * @param conf the Configuration to use/*from ww w .j av a2 s . c o m*/ * @param input the String denoting the input directory path * @param output the String denoting the output directory path * @param measure the DistanceMeasure to use * @param k the number of clusters in Kmeans * @param convergenceDelta the double convergence criteria for iterations * @param maxIterations the int maximum number of iterations */ public static void run(Configuration conf, Path input, Path output, DistanceMeasure measure, int k, double convergenceDelta, int maxIterations) throws Exception { Path directoryContainingConvertedInput = new Path(output, DIRECTORY_CONTAINING_CONVERTED_INPUT); log.info("Preparing Input"); InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector"); log.info("Running random seed to get initial clusters"); Path clusters = new Path(output, "random-seeds"); clusters = RandomSeedGenerator.buildRandom(conf, directoryContainingConvertedInput, clusters, k, measure); System.out.println("****************************************************************************"); log.info("Running KMeans with k = {}", k); KMeansDriver.run(conf, directoryContainingConvertedInput, clusters, output, measure, convergenceDelta, maxIterations, true, 0.0, false); // run ClusterDumper Path outGlob = new Path(output, "clusters-*-final"); Path clusteredPoints = new Path(output, "clusteredPoints"); log.info("Dumping out clusters from clusters: {} and clusteredPoints: {}", outGlob, clusteredPoints); ClusterDumper clusterDumper = new ClusterDumper(outGlob, clusteredPoints); clusterDumper.printClusters(null); FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf); IntWritable key = new IntWritable(); WeightedVectorWritable value = new WeightedVectorWritable(); while (reader.next(key, value)) { System.out.println(value.toString() + " belongs to cluster " + key.toString()); } reader.close(); }
From source file:com.facebook.hive.orc.lazy.LazyIntDictionaryTreeReader.java
License:Open Source License
IntWritable createWritable(Object previous, int v) throws IOException { IntWritable result = null;/*from w w w .ja va 2 s. co m*/ if (previous == null) { result = new IntWritable(); } else { result = (IntWritable) previous; } result.set(v); return result; }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * read output from the map reduce job/*from w ww.jav a 2 s . co m*/ * @param fs the DFS FileSystem * @param jobconf configuration of the map reduce job */ public static long readOutput(FileSystem fs, JobConf jobconf) throws IOException, InterruptedException { //read outputs final Path outdir = new Path(TMP_DIR, "out"); Path infile = new Path(outdir, "reduce-out"); IntWritable nworkers = new IntWritable(); LongWritable result = new LongWritable(); long output = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, infile, jobconf); try { reader.next(nworkers, result); output = result.get(); } finally { reader.close(); } return output; }
From source file:com.github.sakserv.sequencefile.SequenceFileReader.java
License:Apache License
public static void main(String[] args) { String inputFile = args[0];/* w ww . java 2s.c o m*/ Configuration conf = new Configuration(); try { Path seqFilePath = new Path(inputFile); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(seqFilePath)); Text key = new Text(); IntWritable val = new IntWritable(); while (reader.next(key, val)) { LOG.info("Sequence File Data: Key: " + key + "\tValue: " + val); } reader.close(); } catch (IOException e) { LOG.error("ERROR: Could not load hadoop configuration"); e.printStackTrace(); } }
From source file:com.github.ygf.pagerank.InLinksTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("inlinks.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); int[] inLinks = new int[topN.size()]; String[] titles = new String[topN.size()]; for (int i = inLinks.length - 1; i >= 0; i--) { Map.Entry<Integer, Integer> entry = topN.poll(); page.set(entry.getValue());/*from w ww . jav a2 s . c om*/ MapFileOutputFormat.getEntry(readers, partitioner, page, title); inLinks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < inLinks.length; i++) { context.write(new IntWritable(inLinks[i]), new Text(titles[i])); } }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private int getNumPages(Configuration conf, Path titlesDir) throws Exception { int numPages = 0; IntWritable pageNumber = new IntWritable(); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); for (int i = 0; i < readers.length; i++) { readers[i].finalKey(pageNumber); if (pageNumber.get() > numPages) { numPages = pageNumber.get(); }//from ww w. j a va 2 s . c o m } for (MapFile.Reader reader : readers) { reader.close(); } return numPages; }