Example usage for org.apache.hadoop.io IntWritable IntWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable IntWritable.

Prototype

public IntWritable()

Source Link

Usage

From source file:com.yahoo.glimmer.indexing.generator.IndexRecordWriterTest.java

License:Open Source License

@Test
public void test() throws Exception {
    context.checking(new Expectations() {
        {// w  ww .  jav  a2s .c  om
            allowing(taskContext).getConfiguration();
            will(returnValue(conf));
            allowing(taskContext).getTaskAttemptID();
            will(returnValue(taskAttemptID));
        }
    });
    OutputFormat outputFormat = new IndexRecordWriter.OutputFormat();

    conf.setStrings("RdfFieldNames", "index0", "index1");
    conf.setEnum("IndexType", RDFDocumentFactory.IndexType.VERTICAL);

    RecordWriter<IntWritable, IndexRecordWriterValue> recordWriter = outputFormat.getRecordWriter(taskContext);

    IntWritable key = new IntWritable();
    IndexRecordWriterTermValue termValue = new IndexRecordWriterTermValue();
    IndexRecordWriterDocValue docValue = new IndexRecordWriterDocValue();
    IndexRecordWriterSizeValue sizeValue = new IndexRecordWriterSizeValue();

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term1");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(0); // term1 occurs in index 0
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(3);
    termValue.setOccurrenceCount(6);
    termValue.setSumOfMaxTermPositions(15 + 12 + 18);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(11);
    docValue.addOccurrence(15);
    recordWriter.write(key, docValue);
    docValue.setDocument(4);
    docValue.clearOccerrences();
    docValue.addOccurrence(12);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    docValue.addOccurrence(17);
    docValue.addOccurrence(18);
    recordWriter.write(key, docValue);

    // ALIGNEMENT_INDEX
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term2");
    termValue.setTermFrequency(2);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.clearOccerrences();
    docValue.setDocument(0); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);
    docValue.setDocument(1); // term2 occurs in index 0 & 1
    recordWriter.write(key, docValue);

    // Index 0
    key.set(0);
    termValue.setTermFrequency(2);
    termValue.setOccurrenceCount(4);
    termValue.setSumOfMaxTermPositions(19 + 16);
    recordWriter.write(key, termValue);

    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(19);
    recordWriter.write(key, docValue);
    docValue.setDocument(7);
    docValue.clearOccerrences();
    docValue.addOccurrence(13);
    docValue.addOccurrence(16);
    recordWriter.write(key, docValue);

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(1);
    termValue.setSumOfMaxTermPositions(14);
    recordWriter.write(key, termValue);
    docValue.setDocument(1);
    docValue.clearOccerrences();
    docValue.addOccurrence(14);
    recordWriter.write(key, docValue);

    // ALIGNMENT_INDEX 
    key.set(DocumentMapper.ALIGNMENT_INDEX);
    termValue.setTerm("term3");
    termValue.setTermFrequency(1);
    // The alignment index doesn't have positions/counts.
    termValue.setOccurrenceCount(0);
    termValue.setSumOfMaxTermPositions(0);
    recordWriter.write(key, termValue);
    docValue.setDocument(1); // term3 occurs in index 1
    recordWriter.write(key, docValue);
    docValue.clearOccerrences();

    // Index 1
    key.set(1);
    termValue.setTermFrequency(1);
    termValue.setOccurrenceCount(2);
    termValue.setSumOfMaxTermPositions(11);
    recordWriter.write(key, termValue);
    docValue.setDocument(3);
    docValue.clearOccerrences();
    docValue.addOccurrence(10);
    docValue.addOccurrence(11);
    recordWriter.write(key, docValue);

    // Doc Sizes.
    key.set(0);
    sizeValue.setDocument(0);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(3);
    sizeValue.setSize(1);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(4);
    sizeValue.setSize(10);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(2);
    recordWriter.write(key, sizeValue);

    key.set(1);
    sizeValue.setDocument(3);
    sizeValue.setSize(3);
    recordWriter.write(key, sizeValue);
    sizeValue.setDocument(6);
    sizeValue.setSize(5);
    recordWriter.write(key, sizeValue);

    recordWriter.close(taskContext);

    // Check the written indexes..

    Path workPath = outputFormat.getDefaultWorkFile(taskContext, "");
    System.out.println("Default work file is " + workPath.toString());
    String dir = workPath.toUri().getPath();
    BitStreamIndex index0 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index0", true, true);
    assertEquals(8, index0.numberOfDocuments);
    assertEquals(2, index0.numberOfTerms);
    assertTrue(index0.hasPositions);
    // term1
    checkOccurrences(index0.documents(0), 3, "(3:11,15) (4:12) (7:14,17,18)");
    // term2
    checkOccurrences(index0.documents(1), 2, "(1:10,19) (7:13,16)");
    assertEquals("[3, 0, 0, 1, 10, 0, 2, 0]", index0.sizes.toString());

    BitStreamIndex index1 = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/index1", true, true);
    assertEquals(8, index1.numberOfDocuments);
    assertEquals(2, index1.numberOfTerms);
    assertTrue(index0.hasPositions);
    checkOccurrences(index1.documents(0), 1, "(1:14)");
    // term3
    checkOccurrences(index1.documents(1), 1, "(3:10,11)");

    BitStreamIndex indexAlignment = (BitStreamIndex) DiskBasedIndex.getInstance(dir + "/alignment", true);
    assertEquals(8, indexAlignment.numberOfDocuments);
    assertEquals(3, indexAlignment.numberOfTerms);
    assertFalse(indexAlignment.hasPositions);
    // term1
    assertEquals(1, indexAlignment.documents(0).frequency());
    // term2
    assertEquals(2, indexAlignment.documents(1).frequency());
    // term3
    assertEquals(1, indexAlignment.documents(2).frequency());
    assertEquals("[0, 0, 0, 3, 0, 0, 5, 0]", index1.sizes.toString());
}

From source file:com.yahoo.glimmer.indexing.generator.TermReduce.java

License:Open Source License

@Override
protected void setup(
        org.apache.hadoop.mapreduce.Reducer<TermKey, TermValue, IntWritable, IndexRecordWriterValue>.Context context)
        throws IOException, InterruptedException {
    writerKey = new IntWritable();
    writerTermValue = new IndexRecordWriterTermValue();
    writerDocValue = new IndexRecordWriterDocValue();
    writerSizeValue = new IndexRecordWriterSizeValue();
    predicatedIds = new ArrayList<Long>();
}

From source file:com.yahoo.semsearch.fastlinking.io.Datapack.java

License:Apache License

private void merge(String anchorMapPath, String dfMapPath, String multiple_out, String out, String ngram)
        throws IOException {

    JobConf conf = new JobConf(getConf(), Datapack.class);
    FileSystem fs = FileSystem.get(conf);

    BufferedWriter anchorsDataOut;
    BufferedWriter anchorsTSVOut;

    Boolean multiple_output = (multiple_out != null && multiple_out.equalsIgnoreCase("true"));
    Boolean ngram_output = (ngram != null && ngram.equalsIgnoreCase("true"));

    if (!multiple_output) {
        anchorsDataOut = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(out), outputEncoding));
        anchorsTSVOut = null;//from   w  w  w  .  j av  a 2s . c o  m
    } else {
        anchorsDataOut = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(out + ".dat"), outputEncoding));
        anchorsTSVOut = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(out + ".tsv"), outputEncoding));
    }

    // Loop over anchors
    MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf);
    MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf);

    /*FileStatus[] status = fs.listStatus( new Path( dfMapPath ) );  // you need to pass in your hdfs path
    for( FileStatus fileStatus : status ) {
    if( !fileStatus.getPath().toString().contains( "part-" )) continue;
    MapFile.Reader dfMapReader = new MapFile.Reader( fileStatus.getPath(), conf );
    */
    Text akey = new Text();
    Text dkey = new Text();
    IntWritable df = new IntWritable();
    HMapSIW map = new HMapSIW();

    while (anchorMapReader.next(akey, map)) {

        // since they are both sorted we can just iterate over both
        // TODO if need be, artificially add a 0 count to unseen anchors
        dfMapReader.next(dkey, df);
        while (!akey.toString().equalsIgnoreCase(dkey.toString())) {
            //System.err.println("Mismatch: '" + akey + "' and '" + dkey + "'");
            anchorMapReader.next(akey, map);
        }
        String l = akey.toString();

        //            while( dfMapReader.next( dkey, df ) ) {

        //              String l = dkey.toString();
        if (l.trim().length() < 2)
            continue;

        StringBuilder targets = new StringBuilder();
        int total = 0;
        for (String target : map.keySet()) {

            int count = map.get(target);
            total += count;

            String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");

            targets.append(entity);
            targets.append(SEPARATOR);
            targets.append(Integer.toString(count));
            targets.append("\t");

        }

        if (StringUtils.isNumeric(l) && total < 2)
            continue;

        //System.err.println("targets " + targets);
        if (targets.length() < 2)
            continue;
        if (!ngram_output) {
            anchorsDataOut.write(l);
            anchorsDataOut.write(SEPARATOR);
            anchorsDataOut.write(Integer.toString(df.get()));
            anchorsDataOut.write(SEPARATOR);
            anchorsDataOut.write(Integer.toString(total));
            anchorsDataOut.write("\t");
            anchorsDataOut.write(targets.substring(0, targets.length() - 1));
            anchorsDataOut.write("\n");
            anchorsDataOut.flush();

            if (multiple_output) {
                for (String target : map.keySet()) {
                    int count = map.get(target);
                    String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");
                    anchorsTSVOut.write(l);
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(df.get()));
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(total));
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(entity);
                    anchorsTSVOut.write("\t");
                    anchorsTSVOut.write(Integer.toString(count));
                    anchorsTSVOut.write("\n");
                    anchorsTSVOut.flush();
                }
            }
        } else {
            String parts[] = l.split("\\s+");
            for (int i = 0; i < parts.length; i++) {
                StringBuilder sb = new StringBuilder();
                for (int j = i; j < parts.length; j++) {
                    sb.append(parts[j]);
                    String ss = sb.toString();
                    anchorsDataOut.write(ss);
                    anchorsDataOut.write(SEPARATOR);
                    anchorsDataOut.write(Integer.toString(df.get()));
                    anchorsDataOut.write(SEPARATOR);
                    anchorsDataOut.write(Integer.toString(total));
                    anchorsDataOut.write("\t");
                    anchorsDataOut.write(targets.substring(0, targets.length() - 1));
                    anchorsDataOut.write("\n");
                    anchorsDataOut.flush();
                    if (multiple_output) {
                        for (String target : map.keySet()) {
                            int count = map.get(target);
                            String entity = URLEncoder.encode(target.replaceAll(" ", "_"), "UTF-8");
                            anchorsTSVOut.write(ss);
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(df.get()));
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(total));
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(entity);
                            anchorsTSVOut.write("\t");
                            anchorsTSVOut.write(Integer.toString(count));
                            anchorsTSVOut.write("\n");
                            anchorsTSVOut.flush();
                        }
                        sb.append(" ");
                    }
                }
            }
        }
    }
    dfMapReader.close();
    //}

    anchorsDataOut.close();

    if (multiple_output) {
        anchorsTSVOut.close();
    }

    //anchorMapReader.close();

    fs.close();

}

From source file:com.yahoo.semsearch.fastlinking.io.ExtractWikipediaAnchorText.java

License:Apache License

private void merge(String anchorMapPath, String dfMapPath) throws IOException {
    LOG.info("Extracting anchor text (merge)...");
    LOG.info(" - input:   " + anchorMapPath);
    LOG.info(" - output:  " + dfMapPath);

    JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
    FileSystem fs = FileSystem.get(conf);

    // Loop over anchors
    MapFile.Reader anchorMapReader = new MapFile.Reader(new Path(anchorMapPath + "/part-00000"), conf);
    MapFile.Reader dfMapReader = new MapFile.Reader(new Path(dfMapPath + "/part-00000"), conf);

    // IntWritable key = new IntWritable(Integer.parseInt(cmdline.getArgs()[0]));
    // System.out.println(key.toString());

    Text key = new Text();
    IntWritable df = new IntWritable();
    while (dfMapReader.next(key, df)) {

        //if (!key.toString().equalsIgnoreCase("Jim Durham"))
        //   continue;

        HMapSIW map = new HMapSIW();
        anchorMapReader.get(key, map);//  www. ja  v  a  2s  .co  m

        System.out.println(key + "\t" + df + "\t" + map.toString());

        // for (String entity : map.keySet()) {
        // System.out.println("\t" + entity + "\t" + map.get(entity) + "\n");
        // }

        break;

    }
    anchorMapReader.close();
    dfMapReader.close();
    fs.close();

}

From source file:crunch.MaxTemperature.java

License:Apache License

public static void main(String[] args) throws IOException {
        String uri = args[0];/*ww w .j a v  a 2  s. c  om*/
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);

        IntWritable key = new IntWritable();
        Text value = new Text();
        MapFile.Writer writer = null;
        try {
            writer = new MapFile.Writer(conf, fs, uri, key.getClass(), value.getClass());

            for (int i = 0; i < 1024; i++) {
                key.set(i + 1);
                value.set(DATA[i % DATA.length]);
                writer.append(key, value);
            }
        } finally {
            IOUtils.closeStream(writer);
        }
    }

From source file:crunch.MaxTemperature.java

License:Apache License

public static void main(String[] args) throws IOException {
        String uri = args[0];//from   w  w w  .  j a  va  2 s  .  c  om
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(uri);

        IntWritable key = new IntWritable();
        Text value = new Text();
        SequenceFile.Writer writer = null;
        try {
            writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());

            for (int i = 0; i < 100; i++) {
                key.set(100 - i);
                value.set(DATA[i % DATA.length]);
                System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
                writer.append(key, value);
            }
        } finally {
            IOUtils.closeStream(writer);
        }
    }

From source file:crunch.MaxTemperature.java

License:Apache License

@Test
    public void walkthroughWithNoArgsConstructor() throws IOException {
        // vv IntWritableTest
        IntWritable writable = new IntWritable();
        writable.set(163);/*from  www  . java  2s  . c o m*/
        // ^^ IntWritableTest
        checkWalkthrough(writable);
    }

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkWalkthrough(IntWritable writable) throws IOException {
        // vv IntWritableTest-SerializedLength
        byte[] bytes = serialize(writable);
        assertThat(bytes.length, is(4));
        // ^^ IntWritableTest-SerializedLength

        // vv IntWritableTest-SerializedBytes
        assertThat(StringUtils.byteToHexString(bytes), is("000000a3"));
        // ^^ IntWritableTest-SerializedBytes

        // vv IntWritableTest-Deserialization
        IntWritable newWritable = new IntWritable();
        deserialize(newWritable, bytes);
        assertThat(newWritable.get(), is(163));
        // ^^ IntWritableTest-Deserialization
    }// w w w. j  ava2  s  . co m

From source file:DAAL.KmeansStep1Mapper.java

License:Open Source License

@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

    /* Read a data set */
    String filePath = "/Hadoop/Kmeans/data/" + value;
    double[] data = new double[nFeatures * nVectorsInBlock];
    readData(filePath, nFeatures, nVectorsInBlock, data);

    DaalContext daalContext = new DaalContext();

    HomogenNumericTable ntData = new HomogenNumericTable(daalContext, data, nFeatures, nVectorsInBlock);

    /* Create an algorithm to compute k-means on local nodes */
    DistributedStep1Local kmeansLocal = new DistributedStep1Local(daalContext, Double.class,
            Method.defaultDense, nClusters);

    /* Get the centroids table computed in step 2 */
    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(),
            SequenceFile.Reader.file(new Path("/Hadoop/Kmeans/initResults/centroids")));
    IntWritable step1key = new IntWritable();
    WriteableData step1value = new WriteableData();
    reader.next(step1key, step1value);//from www.j a  va  2  s .  c  om
    reader.close();

    HomogenNumericTable centroids = (HomogenNumericTable) step1value.getObject(daalContext);

    /* Set the algorithm parameters */
    kmeansLocal.input.set(InputId.data, ntData);
    kmeansLocal.input.set(InputId.inputCentroids, centroids);

    /* Compute k-means on local nodes */
    PartialResult pres = kmeansLocal.compute();

    /* Write the data prepended with a data set sequence number. Needed to know the position of the data set in the input data */
    context.write(new IntWritable(0), new WriteableData(index, pres));

    daalContext.dispose();
    index += totalTasks;
}

From source file:DAAL.QRStep3Mapper.java

License:Open Source License

@Override
public void map(IntWritable step2key, WriteableData step2value, Context context)
        throws IOException, InterruptedException {

    DaalContext daalContext = new DaalContext();

    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(),
            SequenceFile.Reader.file(new Path("/Hadoop/QR/step1/step1x" + step2value.getId())));
    IntWritable step1key = new IntWritable();
    WriteableData step1value = new WriteableData();
    reader.next(step1key, step1value);/*from  w  ww.j a va2 s.c o  m*/
    reader.close();

    DataCollection s1 = (DataCollection) step1value.getObject(daalContext);
    DataCollection s2 = (DataCollection) step2value.getObject(daalContext);

    /* Create an algorithm to compute QR decomposition on the master node */
    DistributedStep3Local qrStep3Local = new DistributedStep3Local(daalContext, Double.class,
            Method.defaultDense);
    qrStep3Local.input.set(DistributedStep3LocalInputId.inputOfStep3FromStep1, s1);
    qrStep3Local.input.set(DistributedStep3LocalInputId.inputOfStep3FromStep2, s2);

    /* Compute QR decomposition in step 3 */
    qrStep3Local.compute();
    Result result = qrStep3Local.finalizeCompute();
    HomogenNumericTable Qi = (HomogenNumericTable) result.get(ResultId.matrixQ);

    SequenceFile.Writer writer = SequenceFile.createWriter(new Configuration(),
            SequenceFile.Writer.file(new Path("/Hadoop/QR/Output/Qx" + step2value.getId())),
            SequenceFile.Writer.keyClass(IntWritable.class),
            SequenceFile.Writer.valueClass(WriteableData.class));
    writer.append(new IntWritable(0), new WriteableData(step2value.getId(), Qi));
    writer.close();

    daalContext.dispose();
}