List of usage examples for org.apache.hadoop.io VIntWritable VIntWritable
public VIntWritable(int value)
From source file:org.apache.hive.hcatalog.data.ReaderWriter.java
License:Apache License
public static void writeDatum(DataOutput out, Object val) throws IOException { // write the data type byte type = DataType.findType(val); out.write(type);//from w w w.j a va 2s. c om switch (type) { case DataType.LIST: List<?> list = (List<?>) val; int sz = list.size(); out.writeInt(sz); for (int i = 0; i < sz; i++) { writeDatum(out, list.get(i)); } return; case DataType.MAP: Map<?, ?> m = (Map<?, ?>) val; out.writeInt(m.size()); Iterator<?> i = m.entrySet().iterator(); while (i.hasNext()) { Entry<?, ?> entry = (Entry<?, ?>) i.next(); writeDatum(out, entry.getKey()); writeDatum(out, entry.getValue()); } return; case DataType.INTEGER: new VIntWritable((Integer) val).write(out); return; case DataType.LONG: new VLongWritable((Long) val).write(out); return; case DataType.FLOAT: out.writeFloat((Float) val); return; case DataType.DOUBLE: out.writeDouble((Double) val); return; case DataType.BOOLEAN: out.writeBoolean((Boolean) val); return; case DataType.BYTE: out.writeByte((Byte) val); return; case DataType.SHORT: out.writeShort((Short) val); return; case DataType.STRING: String s = (String) val; byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); out.writeInt(utfBytes.length); out.write(utfBytes); return; case DataType.BINARY: byte[] ba = (byte[]) val; out.writeInt(ba.length); out.write(ba); return; case DataType.NULL: //for NULL we just write out the type return; case DataType.CHAR: new HiveCharWritable((HiveChar) val).write(out); return; case DataType.VARCHAR: new HiveVarcharWritable((HiveVarchar) val).write(out); return; case DataType.DECIMAL: new HiveDecimalWritable((HiveDecimal) val).write(out); return; case DataType.DATE: new DateWritable((Date) val).write(out); return; case DataType.TIMESTAMP: new TimestampWritable((java.sql.Timestamp) val).write(out); return; default: throw new IOException("Unexpected data type " + type + " found in stream."); } }
From source file:org.commoncrawl.service.pagerank.PageRankValueReWriter.java
License:Open Source License
public static void main(String[] args) { int nodeIndex = Integer.parseInt(args[0]); LOG.info("Node Index:" + args[0]); int nodeCount = Integer.parseInt(args[1]); LOG.info("Node Count:" + args[1]); String idsDirectory = args[2]; LOG.info("ID Directory is:" + args[2]); String valuesDirectory = args[3]; LOG.info("Values Directory is:" + args[3]); int iterationNumber = Integer.parseInt(args[4]); LOG.info("Iteration Number is:" + args[4]); int runDate = Integer.parseInt(args[5]); LOG.info("runDate is:" + args[5]); Configuration conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site.xml"); conf.addResource("hadoop-default.xml"); conf.addResource("hadoop-site.xml"); conf.addResource("commoncrawl-default.xml"); conf.addResource("commoncrawl-site.xml"); CrawlEnvironment.setHadoopConfig(conf); CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/"); try {// w w w.j a v a 2s. co m FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem(); Path outputPath = new Path("crawl/pageRank/out", Integer.toString(runDate)); LOG.info("Output Directory is:" + outputPath); fileSystem.mkdirs(outputPath); //iterate values based on node id for (int i = nodeIndex; i < nodeIndex + 1; ++i) { LOG.info("Processing output for Node:" + i); Path valuePath = new Path(valuesDirectory, "value_" + NUMBER_FORMAT.format(iterationNumber) + "-" + NUMBER_FORMAT.format(i)); LOG.info("Value File Path is:" + valuePath); Path idsPath = new Path(idsDirectory, "ids_" + NUMBER_FORMAT.format(i)); LOG.info("IDs File Path is:" + idsPath); Path outputFile = new Path(outputPath, "part-" + NUMBER_FORMAT.format(i)); LOG.info("Output File Path is:" + outputFile); byte[] valueData = null; { FileStatus valueFileStatus = fileSystem.getFileStatus(valuePath); FSDataInputStream valueInputStream = fileSystem.open(valuePath); LOG.info("Allocating Value Array of Size:" + valueFileStatus.getLen()); valueData = new byte[(int) valueFileStatus.getLen()]; LOG.info("Reading Value Data Size:" + valueFileStatus.getLen()); for (int offset = 0, totalRead = 0; offset < valueFileStatus.getLen();) { int bytesToRead = Math.min(16384, (int) valueFileStatus.getLen() - totalRead); valueInputStream.read(valueData, offset, bytesToRead); offset += bytesToRead; totalRead += bytesToRead; } valueInputStream.close(); LOG.info("Finished Reading Value Data Size:" + valueFileStatus.getLen()); } byte[] idData = null; { FileStatus idFileStatus = fileSystem.getFileStatus(idsPath); FSDataInputStream idInputStream = fileSystem.open(idsPath); LOG.info("Allocating ID Array of Size:" + idFileStatus.getLen()); idData = new byte[(int) idFileStatus.getLen()]; LOG.info("Reading ID Array Data Size:" + idFileStatus.getLen()); for (int offset = 0, totalRead = 0; offset < idFileStatus.getLen();) { int bytesToRead = Math.min(16384, (int) idFileStatus.getLen() - totalRead); idInputStream.read(idData, offset, bytesToRead); offset += bytesToRead; totalRead += bytesToRead; } idInputStream.close(); LOG.info("Finished Reading ID Array Data Size:" + idFileStatus.getLen()); } DataInputStream idInputStream = new DataInputStream(new ByteArrayInputStream(idData)); DataInputStream valueInputStream = new DataInputStream(new ByteArrayInputStream(valueData)); SequenceFile.Writer output = SequenceFile.createWriter(fileSystem, conf, outputFile, URLFP.class, VIntWritable.class); LOG.info("Opened Output Stream"); URLFP currentFP = new URLFP(); boolean eof = false; int itemCount = 0; while (!eof) { try { long timeStart = System.currentTimeMillis(); currentFP.readFields(idInputStream); long timeEnd = System.currentTimeMillis(); // LOG.info("ReadFields Took:" + (timeEnd - timeStart)); ++itemCount; } catch (EOFException e) { LOG.info("EOF reached. Total Item Count:" + itemCount); eof = true; } if (!eof) { long valueFingerprint = WritableUtils.readVLong(valueInputStream); int prValue = valueInputStream.readInt(); if (valueFingerprint != currentFP.getUrlHash()) { throw new IOException("Fingerprint Mismatch! Expected:" + currentFP.getUrlHash() + " Found:" + valueFingerprint + " ItemCount:" + itemCount); } output.append(currentFP, new VIntWritable(prValue)); if (itemCount % 10000 == 0) { LOG.info("Processed " + itemCount + " Values. Last Sampled FP:" + valueFingerprint + " With PR:" + prValue); } currentFP = new URLFP(); } } LOG.info("Done outputing pagerank for Node:" + i + " ItemCount:" + itemCount); valueInputStream.close(); idInputStream.close(); output.close(); } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } }
From source file:org.elasticsearch.hadoop.serialization.WritableTypeToJsonTest.java
License:Apache License
@Test public void testVInteger() { writableTypeToJson(new VIntWritable(Integer.MAX_VALUE)); }
From source file:org.elasticsearch.hadoop.util.WritableUtils.java
License:Apache License
@SuppressWarnings({ "unchecked", "rawtypes" }) public static Writable toWritable(Object object) { if (object instanceof Writable) { return (Writable) object; }//w w w . j ava 2 s . co m if (object == null) { return NullWritable.get(); } if (object instanceof String) { return new Text((String) object); } if (object instanceof Long) { return new VLongWritable((Long) object); } if (object instanceof Integer) { return new VIntWritable((Integer) object); } if (object instanceof Byte) { return new ByteWritable((Byte) object); } if (object instanceof Short) { return WritableCompatUtil.availableShortWritable((Short) object); } if (object instanceof Double) { return new DoubleWritable((Double) object); } if (object instanceof Float) { return new FloatWritable((Float) object); } if (object instanceof Boolean) { return new BooleanWritable((Boolean) object); } if (object instanceof byte[]) { return new BytesWritable((byte[]) object); } if (object instanceof List) { List<Object> list = (List<Object>) object; if (!list.isEmpty()) { Object first = list.get(0); Writable[] content = new Writable[list.size()]; for (int i = 0; i < content.length; i++) { content[i] = toWritable(list.get(i)); } return new ArrayWritable(toWritable(first).getClass(), content); } return new ArrayWritable(NullWritable.class, new Writable[0]); } if (object instanceof SortedSet) { SortedMapWritable smap = new SortedMapWritable(); SortedSet<Object> set = (SortedSet) object; for (Object obj : set) { smap.put((WritableComparable) toWritable(obj), NullWritable.get()); } return smap; } if (object instanceof Set) { MapWritable map = new MapWritable(); Set<Object> set = (Set) object; for (Object obj : set) { map.put(toWritable(obj), NullWritable.get()); } return map; } if (object instanceof SortedMap) { SortedMapWritable smap = new SortedMapWritable(); Map<Object, Object> map = (Map) object; for (Map.Entry<?, ?> entry : map.entrySet()) { smap.put((WritableComparable) toWritable(entry.getKey()), toWritable(entry.getValue())); } return smap; } if (object instanceof Map) { MapWritable result = new MapWritable(); Map<Object, Object> map = (Map) object; for (Map.Entry<?, ?> entry : map.entrySet()) { result.put(toWritable(entry.getKey()), toWritable(entry.getValue())); } return result; } // fall-back to bytearray return new BytesWritable(object.toString().getBytes(StringUtils.UTF_8)); }
From source file:org.terrier.structures.indexing.singlepass.hadoop.Inv2DirectMultiReduce.java
License:Mozilla Public License
/** Take an iterator of postings. Each posting is inverted, and the a new posting generated */ public void map(IntWritable termId, Wrapper<IterablePosting> postingWrapper, OutputCollector<VIntWritable, Posting> collector, Reporter reporter) throws IOException { final IterablePosting postingIterator = postingWrapper.getObject(); reporter.setStatus("Mapping for id " + termId); while (postingIterator.next() != IterablePosting.EOL) { WritablePosting wp = postingIterator.asWritablePosting(); int docid = postingIterator.getId(); wp.setId(termId.get());/*from ww w . ja va 2 s . c o m*/ reporter.progress(); collector.collect(new VIntWritable(docid), wp); } }