Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.github.seqware.queryengine.plugins.contribs.GenesToDonorsAggregationPlugin.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) {
    // val are the values for a given gene, in this case it's a comma sep list
    String newFeatStr = "";
    boolean first = true;
    for (Text val : values) {
        String[] fsArr = val.toString().split(",");
        for (String currFS : fsArr) {
            if (first) {
                first = false;/*from  w ww . ja v  a2s.com*/
                newFeatStr += currFS;
            } else {
                newFeatStr += "," + currFS;
            }
        }
        // HELP, not sure what's going in here, why are you writing the text?
        //reducerInterface.write(val, text);
    }
    Text newVal = new Text();
    newVal.set(key.toString() + "\t" + newFeatStr);
    reducerInterface.write(newVal, null);
}

From source file:com.github.seqware.queryengine.plugins.contribs.MutationsToDonorsAggregationPlugin.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) {
    // values //w  w w . ja va 2  s.  co m
    for (Text val : values) {
        String[] valArr = val.toString().split("\t");
        String[] fsArr = valArr[2].split(",");
        String newFeatStr = "";
        boolean first = true;
        for (String currFS : fsArr) {
            if (first) {
                first = false;
                newFeatStr += currFS;
            } else {
                newFeatStr += "," + currFS;
            }
        }

        val.set(valArr[0] + "\t" + valArr[1] + "\t" + newFeatStr);
        reducerInterface.write(val, null);
    }
}

From source file:com.github.seqware.queryengine.plugins.contribs.OverlappingMutationsAggregationPlugin.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) {
    // values /*from  w ww .  j  a va  2s .co  m*/
    Set<Text> seenSet = new HashSet<Text>();
    String newFeatStr = "";
    boolean first = true;
    for (Text val : values) {
        if (seenSet.contains(val)) {
            continue;
        }
        seenSet.add(val);
        String[] fsArr = val.toString().split(",");
        for (String currFS : fsArr) {
            if (first) {
                first = false;
                newFeatStr += currFS;
            } else {
                newFeatStr += "," + currFS;
            }
        }
    }
    // ( "10:100008435-100008436_G->A MU1157731" , "MU000001 , MU000002, MU00003")
    text.set(key.toString() + "\t" + newFeatStr);
    reducerInterface.write(text, null);
}

From source file:com.github.seqware.queryengine.plugins.contribs.SimpleMutationsToDonorsAggregationPlugin.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<Text> values, ReducerInterface<Text, Text> reducerInterface) {
    // values //  w  ww  .  j  a v  a  2  s  .  co m
    String newFeatStr = "";
    boolean first = true;
    for (Text val : values) {
        String[] fsArr = val.toString().split(",");
        for (String currFS : fsArr) {
            if (first) {
                first = false;
                newFeatStr += currFS;
            } else {
                newFeatStr += "," + currFS;
            }
        }
    }

    text.set(key.toString() + "\t" + newFeatStr);
    reducerInterface.write(text, null);
}

From source file:com.github.ygf.pagerank.InLinksMapper.java

License:Apache License

@Override
public void map(LongWritable inKey, Text inValue, Context context) throws IOException, InterruptedException {

    String[] lineParts = inValue.toString().split(":\\s+");
    String[] vOutlinks = lineParts[1].split("\\s+");

    for (int k = 0; k < vOutlinks.length; k++) {
        int w = Integer.parseInt(vOutlinks[k]);
        context.write(new IntWritable(w), one);
    }//ww  w . j ava2  s .  c  o m
}

From source file:com.github.ygf.pagerank.InLinksTopNReducer.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("inlinks.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    int[] inLinks = new int[topN.size()];
    String[] titles = new String[topN.size()];

    for (int i = inLinks.length - 1; i >= 0; i--) {
        Map.Entry<Integer, Integer> entry = topN.poll();
        page.set(entry.getValue());//  www .  j a v a 2s. co m
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        inLinks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < inLinks.length; i++) {
        context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
    }
}

From source file:com.github.ygf.pagerank.PageRankMatrixMapper.java

License:Apache License

@Override
public void map(LongWritable inKey, Text inValue, Context context) throws IOException, InterruptedException {

    // This task gets a line from links-simple-sorted.txt that contains the
    // out links of a page v. It produces results with keys (i, j) 
    // corresponding to the indexes of the block M_{i,j} in which each
    // link v -> w should be stored. The value is (v, w, degree(v)).

    Configuration conf = context.getConfiguration();
    short blockSize = Short.parseShort(conf.get("pagerank.block_size"));

    String[] lineParts = inValue.toString().split(":\\s+");
    String[] vOutlinks = lineParts[1].split("\\s+");

    ShortWritable[] blockIndexes = new ShortWritable[2];
    blockIndexes[0] = new ShortWritable();
    blockIndexes[1] = new ShortWritable();

    ShortWritable[] blockEntry = new ShortWritable[3];
    blockEntry[0] = new ShortWritable();
    blockEntry[1] = new ShortWritable();
    blockEntry[2] = new ShortWritable();

    int v, w;/*from   w w w . ja v  a 2  s.co m*/
    short i, j;

    v = Integer.parseInt(lineParts[0]);
    j = (short) ((v - 1) / blockSize + 1);

    for (int k = 0; k < vOutlinks.length; k++) {
        w = Integer.parseInt(vOutlinks[k]);
        i = (short) ((w - 1) / blockSize + 1);

        // Indexes of the block M_{i,j}.
        blockIndexes[0].set(i);
        blockIndexes[1].set(j);
        // One entry of the block M_{i,j} corresponding to the v -> w link.
        // The sparse block representation also needs information about
        // the degree of the vector v.
        blockEntry[0].set((short) ((v - 1) % blockSize));
        blockEntry[1].set((short) ((w - 1) % blockSize));
        blockEntry[2].set((short) vOutlinks.length);

        context.write(new ShortArrayWritable(blockIndexes), new ShortArrayWritable(blockEntry));
    }
}

From source file:com.github.ygf.pagerank.PageRankTopNReducer.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    Path titlesDir = new Path(conf.get("pagerank.titles_dir"));

    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
    Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
    IntWritable page = new IntWritable();
    Text title = new Text();

    float[] pageRanks = new float[topN.size()];
    String[] titles = new String[topN.size()];

    // The order of the entries is reversed. The priority queue is in
    // non-decreasing order and we want the highest PageRank first.
    for (int i = pageRanks.length - 1; i >= 0; i--) {
        Map.Entry<Float, Integer> entry = topN.poll();
        // Get the title of the page from the title index.
        page.set(entry.getValue());//from   w  ww  .  j  a v a 2s. c om
        MapFileOutputFormat.getEntry(readers, partitioner, page, title);
        pageRanks[i] = entry.getKey();
        titles[i] = title.toString();
    }

    for (MapFile.Reader reader : readers) {
        reader.close();
    }

    for (int i = 0; i < pageRanks.length; i++) {
        context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
    }
}

From source file:com.google.mr4c.hadoop.MR4CRecordWriter.java

License:Open Source License

private void doWrite(Text key, Text value) throws IOException {
    String name = key.toString();
    StringReader reader = new StringReader(value.toString());
    Dataset dataset = m_serializer.deserializeDataset(reader);
    if (m_algoRunner.getExecutionSource().getOutputDatasetNames(SourceType.LOGS).contains(name)) {
        writeLogs(name, dataset);/*  www .  j a  va2 s.  co m*/
    } else {
        writeData(name, dataset);
    }
}

From source file:com.google.mr4c.hadoop.MR4CReducer.java

License:Open Source License

private void doReduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    Dataset dataset = new Dataset();

    while (values.hasNext()) {
        Text value = values.next();
        StringReader reader = new StringReader(value.toString());
        Dataset slice = m_serializer.deserializeDataset(reader);
        dataset.addSlice(slice);/*from  w w w .jav a2 s  .  c  o m*/
    }

    StringWriter writer = new StringWriter();
    m_serializer.serializeDataset(dataset, writer);
    Text value = new Text(writer.toString());
    output.collect(key, value);
}