List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.github.seqware.queryengine.plugins.contribs.GenesToDonorsAggregationPlugin.java
License:Open Source License
@Override public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) { // val are the values for a given gene, in this case it's a comma sep list String newFeatStr = ""; boolean first = true; for (Text val : values) { String[] fsArr = val.toString().split(","); for (String currFS : fsArr) { if (first) { first = false;/*from w ww . ja v a2s.com*/ newFeatStr += currFS; } else { newFeatStr += "," + currFS; } } // HELP, not sure what's going in here, why are you writing the text? //reducerInterface.write(val, text); } Text newVal = new Text(); newVal.set(key.toString() + "\t" + newFeatStr); reducerInterface.write(newVal, null); }
From source file:com.github.seqware.queryengine.plugins.contribs.MutationsToDonorsAggregationPlugin.java
License:Open Source License
@Override public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) { // values //w w w . ja va 2 s. co m for (Text val : values) { String[] valArr = val.toString().split("\t"); String[] fsArr = valArr[2].split(","); String newFeatStr = ""; boolean first = true; for (String currFS : fsArr) { if (first) { first = false; newFeatStr += currFS; } else { newFeatStr += "," + currFS; } } val.set(valArr[0] + "\t" + valArr[1] + "\t" + newFeatStr); reducerInterface.write(val, null); } }
From source file:com.github.seqware.queryengine.plugins.contribs.OverlappingMutationsAggregationPlugin.java
License:Open Source License
@Override public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) { // values /*from w ww . j a va 2s .co m*/ Set<Text> seenSet = new HashSet<Text>(); String newFeatStr = ""; boolean first = true; for (Text val : values) { if (seenSet.contains(val)) { continue; } seenSet.add(val); String[] fsArr = val.toString().split(","); for (String currFS : fsArr) { if (first) { first = false; newFeatStr += currFS; } else { newFeatStr += "," + currFS; } } } // ( "10:100008435-100008436_G->A MU1157731" , "MU000001 , MU000002, MU00003") text.set(key.toString() + "\t" + newFeatStr); reducerInterface.write(text, null); }
From source file:com.github.seqware.queryengine.plugins.contribs.SimpleMutationsToDonorsAggregationPlugin.java
License:Open Source License
@Override public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) { // values // w ww . j a v a 2 s . co m String newFeatStr = ""; boolean first = true; for (Text val : values) { String[] fsArr = val.toString().split(","); for (String currFS : fsArr) { if (first) { first = false; newFeatStr += currFS; } else { newFeatStr += "," + currFS; } } } text.set(key.toString() + "\t" + newFeatStr); reducerInterface.write(text, null); }
From source file:com.github.ygf.pagerank.InLinksMapper.java
License:Apache License
@Override public void map(LongWritable inKey, Text inValue, Context context) throws IOException, InterruptedException { String[] lineParts = inValue.toString().split(":\\s+"); String[] vOutlinks = lineParts[1].split("\\s+"); for (int k = 0; k < vOutlinks.length; k++) { int w = Integer.parseInt(vOutlinks[k]); context.write(new IntWritable(w), one); }//ww w . j ava2 s . c o m }
From source file:com.github.ygf.pagerank.InLinksTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("inlinks.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); int[] inLinks = new int[topN.size()]; String[] titles = new String[topN.size()]; for (int i = inLinks.length - 1; i >= 0; i--) { Map.Entry<Integer, Integer> entry = topN.poll(); page.set(entry.getValue());// www . j a v a 2s. co m MapFileOutputFormat.getEntry(readers, partitioner, page, title); inLinks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < inLinks.length; i++) { context.write(new IntWritable(inLinks[i]), new Text(titles[i])); } }
From source file:com.github.ygf.pagerank.PageRankMatrixMapper.java
License:Apache License
@Override public void map(LongWritable inKey, Text inValue, Context context) throws IOException, InterruptedException { // This task gets a line from links-simple-sorted.txt that contains the // out links of a page v. It produces results with keys (i, j) // corresponding to the indexes of the block M_{i,j} in which each // link v -> w should be stored. The value is (v, w, degree(v)). Configuration conf = context.getConfiguration(); short blockSize = Short.parseShort(conf.get("pagerank.block_size")); String[] lineParts = inValue.toString().split(":\\s+"); String[] vOutlinks = lineParts[1].split("\\s+"); ShortWritable[] blockIndexes = new ShortWritable[2]; blockIndexes[0] = new ShortWritable(); blockIndexes[1] = new ShortWritable(); ShortWritable[] blockEntry = new ShortWritable[3]; blockEntry[0] = new ShortWritable(); blockEntry[1] = new ShortWritable(); blockEntry[2] = new ShortWritable(); int v, w;/*from w w w . ja v a 2 s.co m*/ short i, j; v = Integer.parseInt(lineParts[0]); j = (short) ((v - 1) / blockSize + 1); for (int k = 0; k < vOutlinks.length; k++) { w = Integer.parseInt(vOutlinks[k]); i = (short) ((w - 1) / blockSize + 1); // Indexes of the block M_{i,j}. blockIndexes[0].set(i); blockIndexes[1].set(j); // One entry of the block M_{i,j} corresponding to the v -> w link. // The sparse block representation also needs information about // the degree of the vector v. blockEntry[0].set((short) ((v - 1) % blockSize)); blockEntry[1].set((short) ((w - 1) % blockSize)); blockEntry[2].set((short) vOutlinks.length); context.write(new ShortArrayWritable(blockIndexes), new ShortArrayWritable(blockEntry)); } }
From source file:com.github.ygf.pagerank.PageRankTopNReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); Path titlesDir = new Path(conf.get("pagerank.titles_dir")); MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf); Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>(); IntWritable page = new IntWritable(); Text title = new Text(); float[] pageRanks = new float[topN.size()]; String[] titles = new String[topN.size()]; // The order of the entries is reversed. The priority queue is in // non-decreasing order and we want the highest PageRank first. for (int i = pageRanks.length - 1; i >= 0; i--) { Map.Entry<Float, Integer> entry = topN.poll(); // Get the title of the page from the title index. page.set(entry.getValue());//from w ww . j a v a 2s. c om MapFileOutputFormat.getEntry(readers, partitioner, page, title); pageRanks[i] = entry.getKey(); titles[i] = title.toString(); } for (MapFile.Reader reader : readers) { reader.close(); } for (int i = 0; i < pageRanks.length; i++) { context.write(new FloatWritable(pageRanks[i]), new Text(titles[i])); } }
From source file:com.google.mr4c.hadoop.MR4CRecordWriter.java
License:Open Source License
private void doWrite(Text key, Text value) throws IOException { String name = key.toString(); StringReader reader = new StringReader(value.toString()); Dataset dataset = m_serializer.deserializeDataset(reader); if (m_algoRunner.getExecutionSource().getOutputDatasetNames(SourceType.LOGS).contains(name)) { writeLogs(name, dataset);/* www . j a va2 s. co m*/ } else { writeData(name, dataset); } }
From source file:com.google.mr4c.hadoop.MR4CReducer.java
License:Open Source License
private void doReduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Dataset dataset = new Dataset(); while (values.hasNext()) { Text value = values.next(); StringReader reader = new StringReader(value.toString()); Dataset slice = m_serializer.deserializeDataset(reader); dataset.addSlice(slice);/*from w w w .jav a2 s . c o m*/ } StringWriter writer = new StringWriter(); m_serializer.serializeDataset(dataset, writer); Text value = new Text(writer.toString()); output.collect(key, value); }