List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:ivory.data.TermDocVectorsForwardIndex.java
License:Apache License
/** * Returns the document vector given a docno. *//*from ww w . jav a 2 s .c o m*/ public TermDocVector getDocVector(int docno) throws IOException { // TODO: This method re-opens the SequenceFile on every access. Would be more efficient to cache // the file handles. if (docno > collectionDocumentCount || docno < 1) { return null; } long pos = positions[docno - docnoOffset - 1]; int fileNo = (int) (pos / BuildTermDocVectorsForwardIndex.BigNumber); pos = pos % BuildTermDocVectorsForwardIndex.BigNumber; SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-" + FORMAT.format(fileNo)), conf); IntWritable key = new IntWritable(); TermDocVector value; try { value = (TermDocVector) reader.getValueClass().newInstance(); } catch (Exception e) { throw new RuntimeException("Unable to instantiate key/value pair!"); } reader.seek(pos); reader.next(key, value); if (key.get() != docno) { LOG.error("unable to doc vector for docno " + docno + ": found docno " + key + " instead"); return null; } reader.close(); return value; }
From source file:kogiri.mapreduce.preprocess.common.readindex.ReadIndexReader.java
License:Open Source License
public int findReadID(long offset) throws ReadIDNotFoundException { LongWritable key = new LongWritable(offset); IntWritable value = new IntWritable(); try {/* w w w . j ava2 s .co m*/ this.mapfileReader.get(key, value); return value.get(); } catch (IOException ex) { LOG.error(ex); } throw new ReadIDNotFoundException("ReadID is not found"); }
From source file:libra.core.kmersimilarity_m.KmerSimilarityMapper.java
License:Apache License
@Override protected void map(CompressedSequenceWritable key, KmerMatchResult value, Context context) throws IOException, InterruptedException { IntWritable[] valueArray = value.getVals(); Path[] kmerIndexPathArray = value.getKmerIndexPath(); // filter out empty values ArrayList<IntWritable> filteredValueArray = new ArrayList<IntWritable>(); ArrayList<Path> filteredKmerIndexPathArray = new ArrayList<Path>(); for (int i = 0; i < valueArray.length; i++) { if (valueArray[i] != null) { filteredValueArray.add(valueArray[i]); filteredKmerIndexPathArray.add(kmerIndexPathArray[i]); }//from w w w.j a v a2s. com } valueArray = null; kmerIndexPathArray = null; if (filteredValueArray.size() <= 1) { // skip return; } int[] fileid_arr = new int[filteredValueArray.size()]; for (int i = 0; i < filteredValueArray.size(); i++) { int fileidInt = 0; String indexFilename = filteredKmerIndexPathArray.get(i).getName(); Integer fileid = this.idCacheTable.get(indexFilename); if (fileid == null) { String fastaFilename = KmerIndexHelper.getFastaFileName(indexFilename); int id = this.fileMapping.getIDFromFastaFile(fastaFilename); this.idCacheTable.put(indexFilename, id); fileidInt = id; } else { fileidInt = fileid.intValue(); } fileid_arr[i] = fileidInt; } // compute normal double[] normal = new double[this.valuesLen]; for (int i = 0; i < this.valuesLen; i++) { normal[i] = 0; } for (int i = 0; i < filteredValueArray.size(); i++) { IntWritable arr = filteredValueArray.get(i); int freq = arr.get(); double tf = 1 + Math.log10(freq); normal[fileid_arr[i]] = ((double) tf) / this.tfConsineNormBase[fileid_arr[i]]; } accumulateScore(normal); this.reportCounter.increment(1); }
From source file:libra.core.kmersimilarity_r.KmerSimilarityMapper.java
License:Apache License
@Override protected void map(CompressedSequenceWritable key, IntWritable value, Context context) throws IOException, InterruptedException { int[] arr = new int[2]; arr[0] = this.file_id; arr[1] = value.get(); context.write(key, new CompressedIntArrayWritable(arr)); }
From source file:libra.preprocess.stage2.KmerIndexBuilderCombiner.java
License:Apache License
@Override protected void reduce(CompressedSequenceWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int frequency = 0; for (IntWritable value : values) { frequency += value.get(); }/*from ww w. ja v a 2 s . c o m*/ context.write(key, new IntWritable(frequency)); }
From source file:libra.preprocess.stage2.KmerIndexBuilderReducer.java
License:Apache License
@Override protected void reduce(CompressedSequenceWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int frequency = 0; for (IntWritable value : values) { frequency += value.get(); }// w ww . j av a2 s. c o m // compute base if (frequency > 0) { this.logTFSquareCounter.increment((long) (Math.pow(1 + Math.log10(frequency), 2) * 1000)); } context.write(key, new IntWritable(frequency)); }
From source file:mahout.analysis.MahoutOAReducer.java
@Override public void reduce(Text id, Iterator<IntWritable> counts, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int totalCount = 0; while (counts.hasNext()) { IntWritable count = counts.next(); totalCount += count.get(); }// w ww.ja va2 s. com if (totalCount > 100) { output.collect(id, new IntWritable(totalCount)); } }
From source file:microbench.WordCountOnHDFSDataLocal.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException { try {/*www . j a v a2 s .co m*/ parseArgs(args); HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); JobConf jobConf = new JobConf(confPath); if (MPI_D.COMM_BIPARTITE_O != null) { // O communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { DataMPIUtil.printArgs(args); } System.out.println("The O task " + rank + " of " + size + " is working..."); HadoopReader<LongWritable, Text> reader = HadoopIOUtil.getReader(jobConf, inDir, TextInputFormat.class, rank, MPI_D.COMM_BIPARTITE_O); Text word = new Text(); IntWritable one = new IntWritable(1); LongWritable khead = reader.createKey(); Text vhead = reader.createValue(); while (reader.next(khead, vhead)) { StringTokenizer itr = new StringTokenizer(vhead.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); // send key-value MPI_D.Send(word, one); } } reader.close(); } else if (MPI_D.COMM_BIPARTITE_A != null) { // A communicator int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_A); System.out.println("The A task " + rank + " of " + size + " is working..."); HadoopWriter<Text, IntWritable> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, Text.class, IntWritable.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A); Text oldKey = null; IntWritable valueData = new IntWritable(); int sum = 0; Object[] keyValue = MPI_D.Recv(); while (keyValue != null) { Text key = (Text) keyValue[0]; IntWritable value = (IntWritable) keyValue[1]; if (oldKey == null) { oldKey = key; sum = value.get(); } else { if (key.equals(oldKey)) { sum += value.get(); } else { valueData.set(sum); outrw.write(oldKey, valueData); oldKey = key; sum = value.get(); } } keyValue = MPI_D.Recv(); } if (oldKey != null) { valueData.set(sum); outrw.write(oldKey, valueData); } outrw.close(); } MPI_D.Finalize(); } catch (MPI_D_Exception e) { e.printStackTrace(); } }
From source file:minor_MapReduce.C4_5.java
License:Open Source License
private static void summarizeData() throws Exception { Job job = Job.getInstance();//from w ww . jav a2 s. co m job.setJarByClass(C4_5.class); job.setJobName("C4.5_summarizeData"); FileInputFormat.addInputPath(job, input_path); FileOutputFormat.setOutputPath(job, summarized_data_path); job.setMapperClass(SummarizeMapper.class); job.setReducerClass(SummarizeReducer.class); job.setOutputKeyClass(TextArrayWritable.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(false); /* Store it locally */ Option optPath = SequenceFile.Reader.file(new Path(summarized_data_path.toString() + "/part-r-00000")); SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), optPath); TextArrayWritable key = new TextArrayWritable(); IntWritable val = new IntWritable(); summarized_data = new HashMap<String[], Integer>(); while (reader.next(key, val)) { summarized_data.put(key.toStrings(), val.get()); } reader.close(); }
From source file:ml.grafos.okapi.graphs.betweeness.BetweenessComputation.java
License:Apache License
/** * Return the current global state/*www. j a v a 2 s .c o m*/ * * @return State that stores the current global state */ private State getCurrentGlobalState() { IntWritable stateInt = this.getAggregatedValue(BetweenessMasterCompute.STATE_AGG); return State.values()[stateInt.get()]; }