Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:com.yahoo.semsearch.fastlinking.io.Datapack.java

License:Apache License

private void merge(String anchorMapPath, String dfMapPath, String multiple_out, String out, String ngram)
        throws IOException {

    JobConf conf = new JobConf(getConf(), Datapack.class);
    FileSystem fs = FileSystem.get(conf);

    BufferedWriter anchorsDataOut;
    BufferedWriter anchorsTSVOut;

    Boolean multiple_output = (multiple_out != null && multiple_out.equalsIgnoreCase("true"));
    Boolean ngram_output = (ngram != null && ngram.equalsIgnoreCase("true"));

    if (!multiple_output) {
        anchorsDataOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out), outputEncoding));
        anchorsTSVOut = null;//from w w w  .jav  a 2 s .  c  om
    } else {
        anchorsDataOut = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(out + ".dat"), outputEncoding));
        anchorsTSVOut = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(out + ".tsv"), outputEncoding));
    }

    // Loop over anchors
    MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf);
    MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf);

    /*FileStatus[] status = fs.listStatus( new Path( dfMapPath ) );  // you need to pass in your hdfs path
    for( FileStatus fileStatus : status ) {
    if( !fileStatus.getPath().toString().contains( "part-" )) continue;
    MapFile.Reader dfMapReader = new MapFile.Reader( fileStatus.getPath(), conf );
    */
    Text akey = new Text();
    Text dkey = new Text();
    IntWritable df = new IntWritable();
    HMapSIW map = new HMapSIW();

    while (anchorMapReader.next(akey, map)) {

        // since they are both sorted we can just iterate over both
        // TODO if need be, artificially add a 0 count to unseen anchors
        dfMapReader.next(dkey, df);
        while (!akey.toString().equalsIgnoreCase(dkey.toString())) {
            //System.err.println("Mismatch: '" + akey + "' and '" + dkey + "'");
            anchorMapReader.next(akey, map);
        }
        String l = akey.toString();

        //            while( dfMapReader.next( dkey, df ) ) {

        //              String l = dkey.toString();
        if (l.trim().length() < 2)
            continue;

        StringBuilder targets = new StringBuilder();
        int total = 0;
        for (String target : map.keySet()) {

            int count = map.get(target);
            total += count;

            String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");

            targets.append(entity);
            targets.append(SEPARATOR);
            targets.append(Integer.toString(count));
            targets.append("\t");

        }

        if (StringUtils.isNumeric(l) && total < 2)
            continue;

        //System.err.println("targets " + targets);
        if (targets.length() < 2)
            continue;
        if (!ngram_output) {
            anchorsDataOut.write(l);
            anchorsDataOut.write(SEPARATOR);
            anchorsDataOut.write(Integer.toString(df.get()));
            anchorsDataOut.write(SEPARATOR);
            anchorsDataOut.write(Integer.toString(total));
            anchorsDataOut.write("\t");
            anchorsDataOut.write(targets.substring(0, targets.length() - 1));
            anchorsDataOut.write("\n");
            anchorsDataOut.flush();

            if (multiple_output) {
                for (String target : map.keySet()) {
                    int count = map.get(target);
                    String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");
                    anchorsTSVOut.write(l);
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(df.get()));
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(total));
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(entity);
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(count));
                    anchorsTSVOut.write("\n");
                    anchorsTSVOut.flush();
                }
            }
        } else {
            String parts[] = l.split("\\s+");
            for (int i = 0; i < parts.length; i++) {
                StringBuilder sb = new StringBuilder();
                for (int j = i; j < parts.length; j++) {
                    sb.append(parts[j]);
                    String ss = sb.toString();
                    anchorsDataOut.write(ss);
                    anchorsDataOut.write(SEPARATOR);
                    anchorsDataOut.write(Integer.toString(df.get()));
                    anchorsDataOut.write(SEPARATOR);
                    anchorsDataOut.write(Integer.toString(total));
                    anchorsDataOut.write("\t");
                    anchorsDataOut.write(targets.substring(0, targets.length() - 1));
                    anchorsDataOut.write("\n");
                    anchorsDataOut.flush();
                    if (multiple_output) {
                        for (String target : map.keySet()) {
                            int count = map.get(target);
                            String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");
                            anchorsTSVOut.write(ss);
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(df.get()));
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(total));
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(entity);
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(count));
                            anchorsTSVOut.write("\n");
                            anchorsTSVOut.flush();
                        }
                        sb.append(" ");
                    }
                }
            }
        }
    }
    dfMapReader.close();
    //}

    anchorsDataOut.close();

    if (multiple_output) {
        anchorsTSVOut.close();
    }

    //anchorMapReader.close();

    fs.close();

}

From source file:com.yosanai.tutorial.hadoop.hellohadoop.WordCountReducer.java

License:Open Source License

protected void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws java.io.IOException, InterruptedException {
    int sum = 0;//w ww  .jav a2  s  .co  m
    for (IntWritable intWritable : values) {
        sum += intWritable.get();
    }
    context.write(key, new IntWritable(sum));
}

From source file:com.yourcompany.hadoop.mapreduce.KoreanWordcountReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int sum = 0;/*from   w w w.j a va2 s .c  o  m*/
    for (IntWritable count : values) {
        sum += count.get();
    }

    context.getCounter("COUNT", "UNIQUE_WORD").increment(1);

    if (sum >= minSupport) {
        context.getCounter("COUNT", "OVER_THRESHOLD").increment(1);
        context.write(key, new IntWritable(sum));
    } else {
        context.getCounter("COUNT", "UNDER_THRESHOLD").increment(1);
    }
}

From source file:combiner.CombinerReducer.java

@Override
public void reduce(Text _key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output,
        Reporter reporter) throws IOException {
    Text key = _key;//from ww w .  j  a v a  2 s.  c o  m
    int frequencyForYear = 0;
    while (values.hasNext()) {
        // replace ValueType with the real type of your value
        IntWritable value = (IntWritable) values.next();
        frequencyForYear += value.get();
        // process value
    }

    output.collect(key, new IntWritable(frequencyForYear));
}

From source file:computation.test.MockUtils.java

License:Apache License

/**
 * Prepare a mocked CentralizedServiceWorker.
 *
 * @param numOfPartitions The number of partitions
 * @return CentralizedServiceWorker//from   w ww .  j  ava2 s  .  c o m
 */
public static CentralizedServiceWorker<IntWritable, IntWritable, IntWritable> mockServiceGetVertexPartitionOwner(
        final int numOfPartitions) {
    CentralizedServiceWorker<IntWritable, IntWritable, IntWritable> service = Mockito
            .mock(CentralizedServiceWorker.class);
    Answer<PartitionOwner> answer = new Answer<PartitionOwner>() {
        @Override
        public PartitionOwner answer(InvocationOnMock invocation) throws Throwable {
            IntWritable vertexId = (IntWritable) invocation.getArguments()[0];
            return new BasicPartitionOwner(vertexId.get() % numOfPartitions, null);
        }
    };
    Mockito.when(service.getVertexPartitionOwner(Mockito.any(IntWritable.class))).thenAnswer(answer);
    return service;
}

From source file:crunch.MaxTemperature.java

License:Apache License

  @Override
public void reduce(Text key, Iterable<IntWritable> values, // XXX Iterable
    Context context)//from   w  w w.j ava2  s. c  om
    throws IOException, InterruptedException {
    
  int maxValue = Integer.MIN_VALUE;
  for (IntWritable value : values) {
    maxValue = Math.max(maxValue, value.get());
  }
  context.write(key, new IntWritable(maxValue));
}

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkWalkthrough(IntWritable writable) throws IOException {
        // vv IntWritableTest-SerializedLength
        byte[] bytes = serialize(writable);
        assertThat(bytes.length, is(4));
        // ^^ IntWritableTest-SerializedLength

        // vv IntWritableTest-SerializedBytes
        assertThat(StringUtils.byteToHexString(bytes), is("000000a3"));
        // ^^ IntWritableTest-SerializedBytes

        // vv IntWritableTest-Deserialization
        IntWritable newWritable = new IntWritable();
        deserialize(newWritable, bytes);
        assertThat(newWritable.get(), is(163));
        // ^^ IntWritableTest-Deserialization
    }/*from ww  w .  ja v  a2  s  . co m*/

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void test() throws IOException {
        IntWritable src = new IntWritable(163);
        IntWritable dest = new IntWritable();
        assertThat(writeTo(src, dest), is("000000a3"));
        assertThat(dest.get(), is(src.get()));
    }//from   w w  w .j  a  va  2 s . c o  m

From source file:crunch.MaxTemperature.java

License:Apache License

@Override
    public void reduce(Text key, Iterable<IntWritable> values, Context context)
            throws IOException, InterruptedException {

        int maxValue = Integer.MIN_VALUE;
        for (IntWritable value : values) {
            maxValue = Math.max(maxValue, value.get());
        }//from   w  ww  .  j ava2s . co m
        context.write(key, new IntWritable(maxValue));
    }

From source file:de.kp.core.arules.hadoop.IntArrayWritable.java

License:Open Source License

public int[] get() {

    Writable[] writableArray = intArrayWritable.get();

    int[] intArray = new int[writableArray.length];
    for (int i = 0; i < writableArray.length; i++) {

        IntWritable item = (IntWritable) writableArray[i];
        intArray[i] = item.get();

    }//from   w  w w  .  j a v a2  s.co  m

    return intArray;

}