Example usage for org.apache.hadoop.io IntWritable IntWritable

List of usage examples for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable() 

Source Link

Usage

From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriterTest.java

License:Open Source License

@Test
public void test() throws Exception {
    context.checking(new Expectations() {
        {// w  ww .  jav  a2s .c  om
            allowing(taskContext).getConfiguration();
            will(returnValue(conf));
            allowing(taskContext).getTaskAttemptID();
            will(returnValue(taskAttemptID));
        }
    });
    OutputFormat outputFormat = new IndexRecordWriter.OutputFormat();

    conf.setStrings("RdfFieldNames", "index0", "index1");
    conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL);

    RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext);

    IntWritable key = new IntWritable();
    IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue();
    IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue();
    IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue();

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term1");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(0); // term1 occurs in index 0
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(3);
    termValue.setOccurrenceCount(6);
    termValue.setSumOfMaxTermPositions(15 + 12 + 18);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(11);
    docValue.addOccurrence(15);
    recordWriter.write(key, docValue);
    docValue.setDocument(4);
    docValue.clearOccerrences();
    docValue.addOccurrence(12);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    docValue.addOccurrence(17);
    docValue.addOccurrence(18);
    recordWriter.write(key, docValue);

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term2");
    termValue.setTermFrequency(2);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.clearOccerrences();
    docValue.setDocument(0); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);
    docValue.setDocument(1); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(2);
    termValue.setOccurrenceCount(4);
    termValue.setSumOfMaxTermPositions(19 + 16);
    recordWriter.write(key, termValue);

    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(19);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(13);
    docValue.addOccurrence(16);
    recordWriter.write(key, docValue);

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(1);
    termValue.setSumOfMaxTermPositions(14);
    recordWriter.write(key, termValue);
    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    recordWriter.write(key, docValue);

    // ALIGNMENT_INDEX 
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term3");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(1); // term3 occurs in index 1
    recordWriter.write(key, docValue);
    docValue.clearOccerrences();

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(2);
    termValue.setSumOfMaxTermPositions(11);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(11);
    recordWriter.write(key, docValue);

    // Doc Sizes.
    key.set(0);
    sizeValue.setDocument(0);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(3);
    sizeValue.setSize(1);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(4);
    sizeValue.setSize(10);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(2);
    recordWriter.write(key, sizeValue);

    key.set(1);
    sizeValue.setDocument(3);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(5);
    recordWriter.write(key, sizeValue);

    recordWriter.close(taskContext);

    // Check the written indexes..

    Path workPath = outputFormat.getDefaultWorkFile(taskContext, "");
    System.out.println("Default work file is " + workPath.toString());
    String dir = workPath.toUri().getPath();
    BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true);
    assertEquals(8, index0.numberOfDocuments);
    assertEquals(2, index0.numberOfTerms);
    assertTrue(index0.hasPositions);
    // term1
    checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)");
    // term2
    checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)");
    assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString());

    BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true);
    assertEquals(8, index1.numberOfDocuments);
    assertEquals(2, index1.numberOfTerms);
    assertTrue(index0.hasPositions);
    checkOccurrences(index1.documents(0), 1, "(1:14)");
    // term3
    checkOccurrences(index1.documents(1), 1, "(3:10,11)");

    BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true);
    assertEquals(8, indexAlignment.numberOfDocuments);
    assertEquals(3, indexAlignment.numberOfTerms);
    assertFalse(indexAlignment.hasPositions);
    // term1
    assertEquals(1, indexAlignment.documents(0).frequency());
    // term2
    assertEquals(2, indexAlignment.documents(1).frequency());
    // term3
    assertEquals(1, indexAlignment.documents(2).frequency());
    assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString());
}

From source file:com.yahoo.glimmer.indexing.generator.TermReduce.java

License:Open Source License

@Override
protected void setup(
        org.apache.hadoop.mapreduce.Reducer<TermKey, TermValue, IntWritable, IndexRecordWriterValue>.Context context)
        throws IOException, InterruptedException {
    writerKey = new IntWritable();
    writerTermValue = new IndexRecordWriterTermValue();
    writerDocValue = new IndexRecordWriterDocValue();
    writerSizeValue = new IndexRecordWriterSizeValue();
    predicatedIds = new ArrayList<Long>();
}

From source file:com.yahoo.semsearch.fastlinking.io.Datapack.java

License:Apache License

private void merge(String anchorMapPath, String dfMapPath, String multiple_out, String out, String ngram)
        throws IOException {

    JobConf conf = new JobConf(getConf(), Datapack.class);
    FileSystem fs = FileSystem.get(conf);

    BufferedWriter anchorsDataOut;
    BufferedWriter anchorsTSVOut;

    Boolean multiple_output = (multiple_out != null && multiple_out.equalsIgnoreCase("true"));
    Boolean ngram_output = (ngram != null && ngram.equalsIgnoreCase("true"));

    if (!multiple_output) {
        anchorsDataOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out), outputEncoding));
        anchorsTSVOut = null;//from   w  w  w  .  j av  a 2s . c o  m
    } else {
        anchorsDataOut = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(out + ".dat"), outputEncoding));
        anchorsTSVOut = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(out + ".tsv"), outputEncoding));
    }

    // Loop over anchors
    MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf);
    MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf);

    /*FileStatus[] status = fs.listStatus( new Path( dfMapPath ) );  // you need to pass in your hdfs path
    for( FileStatus fileStatus : status ) {
    if( !fileStatus.getPath().toString().contains( "part-" )) continue;
    MapFile.Reader dfMapReader = new MapFile.Reader( fileStatus.getPath(), conf );
    */
    Text akey = new Text();
    Text dkey = new Text();
    IntWritable df = new IntWritable();
    HMapSIW map = new HMapSIW();

    while (anchorMapReader.next(akey, map)) {

        // since they are both sorted we can just iterate over both
        // TODO if need be, artificially add a 0 count to unseen anchors
        dfMapReader.next(dkey, df);
        while (!akey.toString().equalsIgnoreCase(dkey.toString())) {
            //System.err.println("Mismatch: '" + akey + "' and '" + dkey + "'");
            anchorMapReader.next(akey, map);
        }
        String l = akey.toString();

        //            while( dfMapReader.next( dkey, df ) ) {

        //              String l = dkey.toString();
        if (l.trim().length() < 2)
            continue;

        StringBuilder targets = new StringBuilder();
        int total = 0;
        for (String target : map.keySet()) {

            int count = map.get(target);
            total += count;

            String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");

            targets.append(entity);
            targets.append(SEPARATOR);
            targets.append(Integer.toString(count));
            targets.append("\t");

        }

        if (StringUtils.isNumeric(l) && total < 2)
            continue;

        //System.err.println("targets " + targets);
        if (targets.length() < 2)
            continue;
        if (!ngram_output) {
            anchorsDataOut.write(l);
            anchorsDataOut.write(SEPARATOR);
            anchorsDataOut.write(Integer.toString(df.get()));
            anchorsDataOut.write(SEPARATOR);
            anchorsDataOut.write(Integer.toString(total));
            anchorsDataOut.write("\t");
            anchorsDataOut.write(targets.substring(0, targets.length() - 1));
            anchorsDataOut.write("\n");
            anchorsDataOut.flush();

            if (multiple_output) {
                for (String target : map.keySet()) {
                    int count = map.get(target);
                    String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");
                    anchorsTSVOut.write(l);
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(df.get()));
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(total));
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(entity);
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(count));
                    anchorsTSVOut.write("\n");
                    anchorsTSVOut.flush();
                }
            }
        } else {
            String parts[] = l.split("\\s+");
            for (int i = 0; i < parts.length; i++) {
                StringBuilder sb = new StringBuilder();
                for (int j = i; j < parts.length; j++) {
                    sb.append(parts[j]);
                    String ss = sb.toString();
                    anchorsDataOut.write(ss);
                    anchorsDataOut.write(SEPARATOR);
                    anchorsDataOut.write(Integer.toString(df.get()));
                    anchorsDataOut.write(SEPARATOR);
                    anchorsDataOut.write(Integer.toString(total));
                    anchorsDataOut.write("\t");
                    anchorsDataOut.write(targets.substring(0, targets.length() - 1));
                    anchorsDataOut.write("\n");
                    anchorsDataOut.flush();
                    if (multiple_output) {
                        for (String target : map.keySet()) {
                            int count = map.get(target);
                            String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");
                            anchorsTSVOut.write(ss);
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(df.get()));
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(total));
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(entity);
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(count));
                            anchorsTSVOut.write("\n");
                            anchorsTSVOut.flush();
                        }
                        sb.append(" ");
                    }
                }
            }
        }
    }
    dfMapReader.close();
    //}

    anchorsDataOut.close();

    if (multiple_output) {
        anchorsTSVOut.close();
    }

    //anchorMapReader.close();

    fs.close();

}

From source file:com.yahoo.semsearch.fastlinking.io.ExtractWikipediaAnchorText.java

License:Apache License

private void merge(String anchorMapPath, String dfMapPath) throws IOException {
    LOG.info("Extracting anchor text (merge)...");
    LOG.info(" - input:   " + anchorMapPath);
    LOG.info(" - output:  " + dfMapPath);

    JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
    FileSystem fs = FileSystem.get(conf);

    // Loop over anchors
    MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf);
    MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf);

    // IntWritable key = new IntWritable(Integer.parseInt(cmdline.getArgs()[0]));
    // System.out.println(key.toString());

    Text key = new Text();
    IntWritable df = new IntWritable();
    while (dfMapReader.next(key, df)) {

        //if (!key.toString().equalsIgnoreCase("Jim Durham"))
        //   continue;

        HMapSIW map = new HMapSIW();
        anchorMapReader.get(key, map);//  www. ja  v  a  2s  .co  m

        System.out.println(key + "\t" + df + "\t" + map.toString());

        // for (String entity : map.keySet()) {
        // System.out.println("\t" + entity + "\t" + map.get(entity) + "\n");
        // }

        break;

    }
    anchorMapReader.close();
    dfMapReader.close();
    fs.close();

}

From source file:crunch.MaxTemperature.java

License:Apache License

public static void main(String[] args) throws IOException {
        String uri = args[0];/*ww w .j a v  a 2  s. c  om*/
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);

        IntWritable key = new IntWritable();
        Text value = new Text();
        MapFile.Writer writer = null;
        try {
            writer = new MapFile.Writer(conf, fs, uri, key.getClass(), value.getClass());

            for (int i = 0; i < 1024; i++) {
                key.set(i + 1);
                value.set(DATA[i % DATA.length]);
                writer.append(key, value);
            }
        } finally {
            IOUtils.closeStream(writer);
        }
    }

From source file:crunch.MaxTemperature.java

License:Apache License

public static void main(String[] args) throws IOException {
        String uri = args[0];//from   w  w w  .  j a  va  2 s  .  c  om
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(uri);

        IntWritable key = new IntWritable();
        Text value = new Text();
        SequenceFile.Writer writer = null;
        try {
            writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());

            for (int i = 0; i < 100; i++) {
                key.set(100 - i);
                value.set(DATA[i % DATA.length]);
                System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
                writer.append(key, value);
            }
        } finally {
            IOUtils.closeStream(writer);
        }
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void walkthroughWithNoArgsConstructor() throws IOException {
        // vv IntWritableTest
        IntWritable writable = new IntWritable();
        writable.set(163);/*from  www  . java  2s  . c o m*/
        // ^^ IntWritableTest
        checkWalkthrough(writable);
    }

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkWalkthrough(IntWritable writable) throws IOException {
        // vv IntWritableTest-SerializedLength
        byte[] bytes = serialize(writable);
        assertThat(bytes.length, is(4));
        // ^^ IntWritableTest-SerializedLength

        // vv IntWritableTest-SerializedBytes
        assertThat(StringUtils.byteToHexString(bytes), is("000000a3"));
        // ^^ IntWritableTest-SerializedBytes

        // vv IntWritableTest-Deserialization
        IntWritable newWritable = new IntWritable();
        deserialize(newWritable, bytes);
        assertThat(newWritable.get(), is(163));
        // ^^ IntWritableTest-Deserialization
    }// w w w. j  ava2  s  . co m

From source file:DAAL.KmeansStep1Mapper.java

License:Open Source License

@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

    /* Read a data set */
    String filePath = "/Hadoop/Kmeans/data/" + value;
    double[] data = new double[nFeatures * nVectorsInBlock];
    readData(filePath, nFeatures, nVectorsInBlock, data);

    DaalContext daalContext = new DaalContext();

    HomogenNumericTable ntData = new HomogenNumericTable(daalContext, data, nFeatures, nVectorsInBlock);

    /* Create an algorithm to compute k-means on local nodes */
    DistributedStep1Local kmeansLocal = new DistributedStep1Local(daalContext, Double.class,
            Method.defaultDense, nClusters);

    /* Get the centroids table computed in step 2 */
    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(),
            SequenceFile.Reader.file(new Path("/Hadoop/Kmeans/initResults/centroids")));
    IntWritable step1key = new IntWritable();
    WriteableData step1value = new WriteableData();
    reader.next(step1key, step1value);//from www.j a  va  2  s .  c  om
    reader.close();

    HomogenNumericTable centroids = (HomogenNumericTable) step1value.getObject(daalContext);

    /* Set the algorithm parameters */
    kmeansLocal.input.set(InputId.data, ntData);
    kmeansLocal.input.set(InputId.inputCentroids, centroids);

    /* Compute k-means on local nodes */
    PartialResult pres = kmeansLocal.compute();

    /* Write the data prepended with a data set sequence number. Needed to know the position of the data set in the input data */
    context.write(new IntWritable(0), new WriteableData(index, pres));

    daalContext.dispose();
    index += totalTasks;
}

From source file:DAAL.QRStep3Mapper.java

License:Open Source License

@Override
public void map(IntWritable step2key, WriteableData step2value, Context context)
        throws IOException, InterruptedException {

    DaalContext daalContext = new DaalContext();

    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(),
            SequenceFile.Reader.file(new Path("/Hadoop/QR/step1/step1x" + step2value.getId())));
    IntWritable step1key = new IntWritable();
    WriteableData step1value = new WriteableData();
    reader.next(step1key, step1value);/*from  w  ww.j a va2 s.c o  m*/
    reader.close();

    DataCollection s1 = (DataCollection) step1value.getObject(daalContext);
    DataCollection s2 = (DataCollection) step2value.getObject(daalContext);

    /* Create an algorithm to compute QR decomposition on the master node */
    DistributedStep3Local qrStep3Local = new DistributedStep3Local(daalContext, Double.class,
            Method.defaultDense);
    qrStep3Local.input.set(DistributedStep3LocalInputId.inputOfStep3FromStep1, s1);
    qrStep3Local.input.set(DistributedStep3LocalInputId.inputOfStep3FromStep2, s2);

    /* Compute QR decomposition in step 3 */
    qrStep3Local.compute();
    Result result = qrStep3Local.finalizeCompute();
    HomogenNumericTable Qi = (HomogenNumericTable) result.get(ResultId.matrixQ);

    SequenceFile.Writer writer = SequenceFile.createWriter(new Configuration(),
            SequenceFile.Writer.file(new Path("/Hadoop/QR/Output/Qx" + step2value.getId())),
            SequenceFile.Writer.keyClass(IntWritable.class),
            SequenceFile.Writer.valueClass(WriteableData.class));
    writer.append(new IntWritable(0), new WriteableData(step2value.getId(), Qi));
    writer.close();

    daalContext.dispose();
}