List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:com.github.ygf.pagerank.InLinksTopNMapper.java
License:Apache License
@Override public void map(IntWritable inKey, IntWritable inValue, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int topResults = Integer.parseInt(conf.get("inlinks.top_results")); int page = inKey.get(), pageInLinks = inValue.get(); if (topN.size() < topResults || pageInLinks >= topN.peek().getKey()) { topN.add(new AbstractMap.SimpleEntry<Integer, Integer>(pageInLinks, page)); if (topN.size() > topResults) { topN.poll();/*ww w.j a va2s.c o m*/ } } }
From source file:com.github.ygf.pagerank.InLinksTopNReducer.java
License:Apache License
@Override protected void reduce(IntWritable inKey, Iterable<IntWritable> inValues, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int topResults = Integer.parseInt(conf.get("inlinks.top_results")); for (IntWritable inValue : inValues) { int page = inValue.get(), pageInLinks = inKey.get(); if (topN.size() < topResults || pageInLinks >= topN.peek().getKey()) { topN.add(new AbstractMap.SimpleEntry<Integer, Integer>(pageInLinks, page)); if (topN.size() > topResults) { topN.poll();/*ww w . j av a 2 s. co m*/ } } } }
From source file:com.github.ygf.pagerank.PageRank.java
License:Apache License
private int getNumPages(Configuration conf, Path titlesDir) throws Exception { int numPages = 0; IntWritable pageNumber = new IntWritable(); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); for (int i = 0; i < readers.length; i++) { readers[i].finalKey(pageNumber); if (pageNumber.get() > numPages) { numPages = pageNumber.get(); }//w ww. jav a 2s . c o m } for (MapFile.Reader reader : readers) { reader.close(); } return numPages; }
From source file:com.github.ygf.pagerank.PageRankTopNReducer.java
License:Apache License
@Override protected void reduce(FloatWritable inKey, Iterable<IntWritable> inValues, Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int topResults = Integer.parseInt(conf.get("pagerank.top_results")); for (IntWritable inValue : inValues) { int page = inValue.get(); float pageRank = inKey.get(); // The elements in the queue are sorted (in non-decreasing order) by // PageRank. The queue is filled up until it contains topResults // elements. Then, a new element will be added only if its PageRank // is greater than the lowest PageRank in the queue. If the queue is // full and a new element is added, the one with the lowest PageRank // is removed from the queue. if (topN.size() < topResults || pageRank >= topN.peek().getKey()) { topN.add(new AbstractMap.SimpleEntry<Float, Integer>(pageRank, page)); if (topN.size() > topResults) { topN.poll();/*from w w w. ja v a 2s. c o m*/ } } } }
From source file:com.gotometrics.orderly.FixedIntegerRowKey.java
License:Apache License
@Override public Object deserialize(ImmutableBytesWritable w) throws IOException { IntWritable iw = (IntWritable) super.deserialize(w); if (iw == null) return iw; return Integer.valueOf(iw.get()); }
From source file:com.gotometrics.orderly.FixedUnsignedIntWritableRowKey.java
License:Apache License
protected IntWritable invertSign(IntWritable iw) { iw.set(iw.get() ^ Integer.MIN_VALUE); return iw; }
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
private static Map<Integer, List<String>> readPoints(Path pointsPathDir, Configuration conf) throws IOException { Map<Integer, List<String>> result = new TreeMap<Integer, List<String>>(); FileSystem fs = pointsPathDir.getFileSystem(conf); FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() { public boolean accept(Path path) { String name = path.getName(); return !(name.endsWith(".crc") || name.startsWith("_")); }//from w w w . ja v a2 s. co m }); for (FileStatus file : children) { Path path = file.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); try { IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance(); WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class) .newInstance(); while (reader.next(key, value)) { //key is the clusterId, value is a list of points //String clusterId = value.toString(); List<String> pointList = result.get(key.get()); if (pointList == null) { pointList = new ArrayList<String>(); result.put(key.get(), pointList); } //We know we are dealing with named vectors, b/c we generated from the id field String name = ((NamedVector) value.getVector()).getName(); pointList.add(name); //value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance(); } } catch (InstantiationException e) { log.error("Exception", e); } catch (IllegalAccessException e) { log.error("Exception", e); } } return result; }
From source file:com.gsvic.csmr.io.InputData.java
License:Apache License
/** * Reads the Document-Frequency file/*w w w . j a v a 2s .c o m*/ * @param conf * @param dfFile * @return Returns the Document-Frequency data in a HashMap * @throws IOException */ public static HashMap<IntWritable, LongWritable> readDf(Configuration conf, Path dfFile) throws IOException { FileSystem filesystem = FileSystem.get(conf); SequenceFile.Reader reader; reader = new SequenceFile.Reader(filesystem, dfFile, conf); HashMap<IntWritable, LongWritable> dcf = new HashMap<>(); IntWritable key = new IntWritable(); LongWritable value = new LongWritable(); while (reader.next(key, value)) { dcf.put(new IntWritable(key.get()), new LongWritable(value.get())); } return dcf; }
From source file:com.gsvic.csmr.io.InputData.java
License:Apache License
/** * Reads the dictionary file// ww w . ja v a2s.c om * @param conf * @param dict * @return returns the dictionary in a HashMap * @throws IOException */ public static HashMap<Text, IntWritable> readDictionary(Configuration conf, Path dict) throws IOException { FileSystem filesystem = FileSystem.get(conf); SequenceFile.Reader reader = new SequenceFile.Reader(filesystem, dict, conf); HashMap<Text, IntWritable> dictMap = new HashMap<>(); Text key = new Text(); IntWritable value = new IntWritable(); while (reader.next(key, value)) { dictMap.put(new Text(key), new IntWritable(value.get())); } return dictMap; }
From source file:com.hdfs.concat.crush.CrushPartitioner.java
License:Apache License
@Override public void configure(JobConf job) { String path = job.get("crush.partition.map"); int expPartitions = job.getNumReduceTasks(); bucketToPartition = new HashMap<Text, Integer>(100); try {//from w ww. ja v a2s . c o m FileSystem fs = FileSystem.get(job); Reader reader = new Reader(fs, new Path(path), job); Text bucket = new Text(); IntWritable partNum = new IntWritable(); while (reader.next(bucket, partNum)) { int partNumValue = partNum.get(); if (partNumValue < 0 || partNumValue >= expPartitions) { throw new IllegalArgumentException( "Partition " + partNumValue + " not allowed with " + expPartitions + " reduce tasks"); } Integer prev = bucketToPartition.put(new Text(bucket), partNumValue); if (null != prev) { throw new IllegalArgumentException("Bucket " + bucket + " appears more than once in " + path); } } } catch (IOException e) { throw new RuntimeException("Could not read partition map from " + path, e); } if (new HashSet<Integer>(bucketToPartition.values()).size() > expPartitions) { throw new IllegalArgumentException( path + " contains more than " + expPartitions + " distinct partitions"); } }