Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:org.apache.mahout.utils.vectors.lucene.Driver.java

License:Apache License

public void dumpVectors() throws IOException {

    File file = new File(luceneDir);
    Preconditions.checkArgument(file.isDirectory(),
            "Lucene directory: " + file.getAbsolutePath() + " does not exist or is not a directory");
    Preconditions.checkArgument(maxDocs >= 0, "maxDocs must be >= 0");
    Preconditions.checkArgument(minDf >= 1, "minDf must be >= 1");
    Preconditions.checkArgument(maxDFPercent <= 99, "maxDFPercent must be <= 99");

    Directory dir = FSDirectory.open(file);
    IndexReader reader = DirectoryReader.open(dir);

    Weight weight;// w  w  w  .j a v  a  2  s.co m
    if ("tf".equalsIgnoreCase(weightType)) {
        weight = new TF();
    } else if ("tfidf".equalsIgnoreCase(weightType)) {
        weight = new TFIDF();
    } else {
        throw new IllegalArgumentException("Weight type " + weightType + " is not supported");
    }

    TermInfo termInfo = new CachedTermInfo(reader, field, minDf, maxDFPercent);

    LuceneIterable iterable;
    if (norm == LuceneIterable.NO_NORMALIZING) {
        iterable = new LuceneIterable(reader, idField, field, termInfo, weight, LuceneIterable.NO_NORMALIZING,
                maxPercentErrorDocs);
    } else {
        iterable = new LuceneIterable(reader, idField, field, termInfo, weight, norm, maxPercentErrorDocs);
    }

    log.info("Output File: {}", outFile);

    VectorWriter vectorWriter = getSeqFileWriter(outFile);
    try {
        long numDocs = vectorWriter.write(iterable, maxDocs);
        log.info("Wrote: {} vectors", numDocs);
    } finally {
        Closeables.close(vectorWriter, false);
    }

    File dictOutFile = new File(dictOut);
    log.info("Dictionary Output file: {}", dictOutFile);
    Writer writer = Files.newWriter(dictOutFile, Charsets.UTF_8);
    DelimitedTermInfoWriter tiWriter = new DelimitedTermInfoWriter(writer, delimiter, field);
    try {
        tiWriter.write(termInfo);
    } finally {
        Closeables.close(tiWriter, false);
    }

    if (!"".equals(seqDictOut)) {
        log.info("SequenceFile Dictionary Output file: {}", seqDictOut);

        Path path = new Path(seqDictOut);
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer seqWriter = null;
        try {
            seqWriter = SequenceFile.createWriter(fs, conf, path, Text.class, IntWritable.class);
            Text term = new Text();
            IntWritable termIndex = new IntWritable();

            Iterator<TermEntry> termEntries = termInfo.getAllEntries();
            while (termEntries.hasNext()) {
                TermEntry termEntry = termEntries.next();
                term.set(termEntry.getTerm());
                termIndex.set(termEntry.getTermIdx());
                seqWriter.append(term, termIndex);
            }
        } finally {
            Closeables.close(seqWriter, false);
        }

    }
}

From source file:org.apache.mahout.utils.vectors.RowIdJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*from w ww . ja va 2 s . c  om*/
    addOutputOption();

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = getOutputPath();
    Path indexPath = new Path(outputPath, "docIndex");
    Path matrixPath = new Path(outputPath, "matrix");
    SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class,
            Text.class);
    SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class,
            VectorWritable.class);
    try {
        IntWritable docId = new IntWritable();
        int i = 0;
        int numCols = 0;
        for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
                getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
            VectorWritable value = record.getSecond();
            docId.set(i);
            indexWriter.append(docId, record.getFirst());
            matrixWriter.append(docId, value);
            i++;
            numCols = value.get().size();
        }

        log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath);
        return 0;
    } finally {
        Closeables.close(indexWriter, false);
        Closeables.close(matrixWriter, false);
    }
}

From source file:org.apache.nutch.crawl.TestMapWritable.java

License:Apache License

public void testPerformance() throws Exception {
    FileSystem fs = FileSystem.get(configuration);
    Path file = new Path(System.getProperty("java.io.tmpdir"), "mapTestFile");
    fs.delete(file);/*from  w  ww . jav  a2 s.  c  o m*/
    org.apache.hadoop.io.SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, file,
            IntWritable.class, MapWritable.class);
    // write map
    System.out.println("start writing map's");
    long start = System.currentTimeMillis();
    IntWritable key = new IntWritable();
    MapWritable map = new MapWritable();
    LongWritable mapValue = new LongWritable();
    for (int i = 0; i < 1000000; i++) {
        key.set(i);
        mapValue.set(i);
        map.put(key, mapValue);
        writer.append(key, map);
    }
    long needed = System.currentTimeMillis() - start;
    writer.close();
    System.out.println("needed time for writing map's: " + needed);

    // read map

    org.apache.hadoop.io.SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, configuration);
    System.out.println("start reading map's");
    start = System.currentTimeMillis();
    while (reader.next(key, map)) {

    }
    reader.close();
    needed = System.currentTimeMillis() - start;
    System.out.println("needed time for reading map's: " + needed);
    fs.delete(file);

    // Text
    System.out.println("start writing Text's");
    writer = SequenceFile.createWriter(fs, configuration, file, IntWritable.class, Text.class);
    // write map
    start = System.currentTimeMillis();
    key = new IntWritable();
    Text value = new Text();
    String s = "15726:15726";
    for (int i = 0; i < 1000000; i++) {
        key.set(i);
        value.set(s);
        writer.append(key, value);
    }
    needed = System.currentTimeMillis() - start;
    writer.close();
    System.out.println("needed time for writing Text's: " + needed);

    // read map
    System.out.println("start reading Text's");
    reader = new SequenceFile.Reader(fs, file, configuration);
    start = System.currentTimeMillis();
    while (reader.next(key, value)) {

    }
    needed = System.currentTimeMillis() - start;
    System.out.println("needed time for reading Text: " + needed);
    fs.delete(file);
}

From source file:org.apache.orc.impl.TestStringRedBlackTree.java

License:Apache License

/**
 * Checks the red-black tree rules to make sure that we have correctly built
 * a valid tree.//from   w w  w  . j  a v a 2s . c  o m
 *
 * Properties:
 *   1. Red nodes must have black children
 *   2. Each node must have the same black height on both sides.
 *
 * @param node The id of the root of the subtree to check for the red-black
 *        tree properties.
 * @return The black-height of the subtree.
 */
private int checkSubtree(RedBlackTree tree, int node, IntWritable count) throws IOException {
    if (node == RedBlackTree.NULL) {
        return 1;
    }
    count.set(count.get() + 1);
    boolean is_red = tree.isRed(node);
    int left = tree.getLeft(node);
    int right = tree.getRight(node);
    if (is_red) {
        if (tree.isRed(left)) {
            printTree(tree, "", tree.root);
            throw new IllegalStateException("Left node of " + node + " is " + left + " and both are red.");
        }
        if (tree.isRed(right)) {
            printTree(tree, "", tree.root);
            throw new IllegalStateException("Right node of " + node + " is " + right + " and both are red.");
        }
    }
    int left_depth = checkSubtree(tree, left, count);
    int right_depth = checkSubtree(tree, right, count);
    if (left_depth != right_depth) {
        printTree(tree, "", tree.root);
        throw new IllegalStateException(
                "Lopsided tree at node " + node + " with depths " + left_depth + " and " + right_depth);
    }
    if (is_red) {
        return left_depth;
    } else {
        return left_depth + 1;
    }
}

From source file:org.apache.orc.mapred.OrcMapredRecordReader.java

License:Apache License

static IntWritable nextInt(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;//  w w w. ja  v a2s. c  o m
    }
    if (vector.noNulls || !vector.isNull[row]) {
        IntWritable result;
        if (previous == null || previous.getClass() != IntWritable.class) {
            result = new IntWritable();
        } else {
            result = (IntWritable) previous;
        }
        result.set((int) ((LongColumnVector) vector).vector[row]);
        return result;
    } else {
        return null;
    }
}

From source file:org.apache.orc.mapred.TestMrUnit.java

License:Apache License

@Test
public void testMapred() throws IOException {
    conf.set("io.serializations",
            OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName());
    OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>");
    OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>");
    MyMapper mapper = new MyMapper();
    mapper.configure(conf);/* w  w w  .  j  av  a2 s  . c o m*/
    MyReducer reducer = new MyReducer();
    reducer.configure(conf);
    MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>(
            mapper, reducer);
    driver.setConfiguration(conf);
    NullWritable nada = NullWritable.get();
    OrcStruct input = (OrcStruct) OrcStruct
            .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>"));
    IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0);
    IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1);
    Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0);

    // generate the input stream
    for (int r = 0; r < 20; ++r) {
        x.set(100 - (r / 4));
        y.set(r * 2);
        z.set(Integer.toHexString(r));
        driver.withInput(nada, input);
    }

    // generate the expected outputs
    for (int g = 4; g >= 0; --g) {
        x.set(100 - g);
        for (int i = 0; i < 4; ++i) {
            int r = g * 4 + i;
            y.set(r * 2);
            z.set(Integer.toHexString(r));
            driver.withOutput(nada, input);
        }
    }
    driver.runTest();
}

From source file:org.apache.orc.mapreduce.TestMrUnit.java

License:Apache License

@Test
public void testMapred() throws IOException {
    conf.set("io.serializations",
            OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName());
    OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>");
    OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>");
    MyMapper mapper = new MyMapper();
    MyReducer reducer = new MyReducer();
    MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>(
            mapper, reducer);//from www.j av a  2 s. co m
    driver.setConfiguration(conf);
    NullWritable nada = NullWritable.get();
    OrcStruct input = (OrcStruct) OrcStruct
            .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>"));
    IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0);
    IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1);
    Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0);

    // generate the input stream
    for (int r = 0; r < 20; ++r) {
        x.set(100 - (r / 4));
        y.set(r * 2);
        z.set(Integer.toHexString(r));
        driver.withInput(nada, input);
    }

    // generate the expected outputs
    for (int g = 4; g >= 0; --g) {
        x.set(100 - g);
        for (int i = 0; i < 4; ++i) {
            int r = g * 4 + i;
            y.set(r * 2);
            z.set(Integer.toHexString(r));
            driver.withOutput(nada, input);
        }
    }
    driver.runTest();
}

From source file:org.apache.pig.piggybank.squeal.backend.storm.oper.TriBasicPersist.java

License:Apache License

@Override
public MapIdxWritable combine(MapIdxWritable val1, MapIdxWritable val2) {
    MapIdxWritable ret = zero();//www .j  av  a  2s. c o  m

    if (val1 != null) {
        for (Entry<Writable, Writable> ent : val1.entrySet()) {
            ret.put(ent.getKey(), new IntWritable(((IntWritable) ent.getValue()).get()));
        }
    }

    // We're going to merge into val1.
    if (val2 != null) {
        for (Entry<Writable, Writable> ent : val2.entrySet()) {
            int c = ((IntWritable) ent.getValue()).get();
            IntWritable iw = (IntWritable) ret.get(ent.getKey());
            if (iw == null) {
                iw = new IntWritable(c);
                ret.put(ent.getKey(), iw);
            } else {
                iw.set(iw.get() + c);
            }
        }
    }

    return ret;
}

From source file:org.apache.pig.piggybank.squeal.backend.storm.oper.TriWindowCombinePersist.java

License:Apache License

void addTuple(MapIdxWritable s, NullableTuple t, int c) {
    int idx = t.getIndex();
    Long ws = windowSettings.get(idx);
    if (ws != null) {
        IntWritable key_tmp = new IntWritable(idx);

        // Pull the window.
        WindowBundle<NullableTuple> w = (WindowBundle<NullableTuple>) s.get(key_tmp);

        /*/* w  w  w. j  a  va 2  s .c  om*/
         * FIXME: If we get the negative before the positive, this won't work.
         * The proper way to do this would be to count the removes in window
         * state so we can ignore adds when the matching positive values come
         * in. 
         */
        if (c < 0) {
            // Remove the item for negative items.
            w.remove(t);
        } else {
            // Add it otherwise.
            w.push(t);
        }
    } else {
        // This is not a windowed element, just add like BASEPERSIST.
        IntWritable iw = (IntWritable) s.get(t);
        if (iw == null) {
            iw = new IntWritable(c);
            s.put(t, iw);
        } else {
            iw.set(iw.get() + c);
        }
    }
}

From source file:org.apache.pig.piggybank.squeal.backend.storm.state.WindowBundle.java

License:Apache License

void update(Writable o, int c) {
    if (openWin == null) {
        openNewWindow();//from  w  w w.  j  a v  a  2 s .c o m
    }

    IntWritable iw = (IntWritable) openWin.contents.get(o);
    if (iw == null) {
        iw = new IntWritable(c);
        openWin.contents.put(o, iw);
    } else {
        iw.set(iw.get() + c);
        if (iw.get() == 0) {
            openWin.contents.remove(o);
        }
    }

    // FIXME: This is incorrect for c != +/- 1.
    if (c > 0 && iw.get() > 0) {
        openWin.itemCount += 1;
    } else if (c < 0 && openWin.itemCount > 0) {
        openWin.itemCount -= 1;
    }

    if (openWin.itemCount == maxSize) {
        closeWindow();
    }
}