Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:clustering.link_back.step2.JoinReducer.java

License:Apache License

/**
 * @param key    entry_id@@g_no, join_order
 * @param values cluster_id in step1 result,
 *               or g_name \t g_model [\t else] in pre result.
 *               {@inheritDoc}/*from ww  w.ja  v a2  s  .  com*/
 */
@Override
public void reduce(Step2KeyWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    // called on every group of keys
    for (Text value : values) {
        if (key.getTag().get() == 1) {
            // step result, value = cluster_id
            this.outputValue.set(Integer.valueOf(value.toString()));
        } else {
            this.outputKey.set(key.getJoinKey().toString() + "\t" + value.toString());
            // entry_id@@g_no \t g_name \t g_model [\t else], cluster_id
            context.write(this.outputKey, this.outputValue);
        }
    }
}

From source file:clustering.link_back.step2.SetKeyMapper.java

License:Apache License

/**
 * @param key   entry_id@@g_no/*from   ww  w  . j  ava 2s  .  co m*/
 * @param value cluster_id or content
 *              {@inheritDoc}
 */
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {

    this.taggedKey.set(key.toString(), this.joinOrder);
    // (group_id,join_order) \t cluster_id or content
    context.write(this.taggedKey, value);
}

From source file:clustering.mst.ChildMapper.java

License:Apache License

/**
 * @param key   group_id1,group_id2/*from  www  .j  a v  a2  s .  c  o  m*/
 * @param value similarity
 *              {@inheritDoc}
 */
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {

    String idPair = key.toString();
    String[] ids = idPair.split(",");

    int id1 = Integer.valueOf(ids[0]);
    int id2 = Integer.valueOf(ids[1]);

    // get the weight
    double weight = Double.valueOf(value.toString());
    this.outputKey.set(weight);

    int container = belongsTo(id1, id2);
    this.outputValue.set(idPair + ":" + container);

    // weight \t src,dest:containder_id
    context.write(this.outputKey, this.outputValue);
}

From source file:clustering.mst.ChildPartitioner.java

License:Apache License

/**
 * @param key   weight/*  w  ww.  j a v  a  2s  .c  o m*/
 * @param value (group_id1,group_id2):container_id
 *              {@inheritDoc}
 */
@Override
public int getPartition(DoubleWritable key, Text value, int numPartitions) {
    if (numPartitions == 0) {
        return 0;
    }
    String[] contents = value.toString().split(":");

    return Integer.valueOf(contents[1]) % numPartitions;
}

From source file:clustering.mst.ChildReducer.java

License:Apache License

/**
 * @param inputKey similarity// w ww.j a va2  s  .com
 * @param values   groupId1,groupId2:containerId
 *                 {@inheritDoc}
 */
@Override
public void reduce(DoubleWritable inputKey, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

    for (Text value : values) {
        String[] srcDestPair = value.toString().split(":");

        String[] srcDest = srcDestPair[0].split(",");

        int src = Integer.valueOf(srcDest[0]);
        int dest = Integer.valueOf(srcDest[1]);

        if (this.unionFind.union(src, dest)) {
            this.outputValue.set(srcDestPair[0]);
            context.write(inputKey, this.outputValue);
        }
    }
}

From source file:clustering.mst.FinalMapper.java

License:Apache License

/**
 * @param key   similarity// w  w  w. j  a v  a  2  s .c o  m
 * @param value doc_id1,doc_id2
 *              {@inheritDoc}
 */
@Override
public void map(Text key, Text value, Context context) throws IOException, InterruptedException {

    this.outputKey.set(Double.valueOf(key.toString()));
    // similarity \t doc_id1,doc_id2
    context.write(this.outputKey, value);
}

From source file:clustering.mst.FinalReducer.java

License:Apache License

/**
 * @param inputKey similarity//from  ww w.j a  va2  s. c  om
 * @param values   groupId1,groupId2
 *                 {@inheritDoc}
 */
@Override
public void reduce(DoubleWritable inputKey, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

    if (inputKey.get() < this.threshold) {
        for (Text val : values) {
            String[] srcDest = val.toString().split(",");

            int src = Integer.valueOf(srcDest[0]);
            int dest = Integer.valueOf(srcDest[1]);

            this.unionFind.union(src, dest);
        }
    }
}

From source file:clustering.simhash.Step1Mapper.java

License:Apache License

/**
 * Calculate the SimHash signature of each commodity.
 * The output key is the signature and the output value is the
 * commodity id(entry_id + "@@" + g_no) with commodity info(g_name + "##" + g_model).
 *
 * @param key   entry_id@@g_no//w  w  w . jav a  2  s .com
 * @param value g_name##g_model
 *              {@inheritDoc}
 */
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {

    String nameAndModel = value.toString();

    SimHash signature = SimHash.Builder.of(nameAndModel.replace("##", " ")).build();

    this.outputKey.set(signature.getHashCode());
    this.outputValue.set(key.toString() + "::" + nameAndModel);
    // simhash in long \t entry_id@@g_no::g_name##g_model
    context.write(this.outputKey, this.outputValue);
}

From source file:clustering.simhash.Step1Reducer.java

License:Apache License

/**
 * @param key    simhash//  www  .  jav a  2s. co  m
 * @param values entry_id@@g_no::g_name##g_model
 *               {@inheritDoc}
 */
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {

    for (Text value : values) {
        String[] docIdAndCommoInfo = value.toString().split("::");

        SimHash thisHash = SimHash.Builder.of(docIdAndCommoInfo[1]).build(key.get());

        int id = this._pool.hasSimilar(thisHash, this.threshold);
        if (id == -1) { // does not contain
            id = count.incrementAndGet();
            this._pool.update(thisHash, id);
        }
        this.outputKey.set(id);
        // group_id \t entry_id@@g_no::g_name##g_model
        context.write(this.outputKey, value);
    }
}

From source file:clustering.simhash.Step2Mapper.java

License:Apache License

/**
 * @param key   id//from w  w w  .  j a  v  a2 s  . c o  m
 * @param value entry_id@@g_no::g_name##g_model
 *              {@inheritDoc}
 */
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {

    this.outputKey.set(Integer.valueOf(key.toString()));
    context.write(this.outputKey, value);
}