Example usage for org.apache.hadoop.io IntWritable get

List of usage examples for org.apache.hadoop.io IntWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable get.

Prototype

public int get() 

Source Link

Document

Return the value of this IntWritable.

Usage

From source file:ivory.data.TermDocVectorsForwardIndex.java

License:Apache License

/**
 * Returns the document vector given a docno.
 *//*from   ww w  . jav a 2  s .c  o m*/
public TermDocVector getDocVector(int docno) throws IOException {
    // TODO: This method re-opens the SequenceFile on every access. Would be more efficient to cache
    // the file handles.
    if (docno > collectionDocumentCount || docno < 1) {
        return null;
    }

    long pos = positions[docno - docnoOffset - 1];

    int fileNo = (int) (pos / BuildTermDocVectorsForwardIndex.BigNumber);
    pos = pos % BuildTermDocVectorsForwardIndex.BigNumber;

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(path + "/part-" + FORMAT.format(fileNo)),
            conf);

    IntWritable key = new IntWritable();
    TermDocVector value;

    try {
        value = (TermDocVector) reader.getValueClass().newInstance();
    } catch (Exception e) {
        throw new RuntimeException("Unable to instantiate key/value pair!");
    }

    reader.seek(pos);
    reader.next(key, value);

    if (key.get() != docno) {
        LOG.error("unable to doc vector for docno " + docno + ": found docno " + key + " instead");
        return null;
    }

    reader.close();
    return value;
}

From source file:kogiri.mapreduce.preprocess.common.readindex.ReadIndexReader.java

License:Open Source License

public int findReadID(long offset) throws ReadIDNotFoundException {
    LongWritable key = new LongWritable(offset);
    IntWritable value = new IntWritable();
    try {/* w w w . j  ava2 s .co  m*/
        this.mapfileReader.get(key, value);
        return value.get();
    } catch (IOException ex) {
        LOG.error(ex);
    }

    throw new ReadIDNotFoundException("ReadID is not found");
}

From source file:libra.core.kmersimilarity_m.KmerSimilarityMapper.java

License:Apache License

@Override
protected void map(CompressedSequenceWritable key, KmerMatchResult value, Context context)
        throws IOException, InterruptedException {
    IntWritable[] valueArray = value.getVals();
    Path[] kmerIndexPathArray = value.getKmerIndexPath();

    // filter out empty values
    ArrayList<IntWritable> filteredValueArray = new ArrayList<IntWritable>();
    ArrayList<Path> filteredKmerIndexPathArray = new ArrayList<Path>();

    for (int i = 0; i < valueArray.length; i++) {
        if (valueArray[i] != null) {
            filteredValueArray.add(valueArray[i]);
            filteredKmerIndexPathArray.add(kmerIndexPathArray[i]);
        }//from   w w w.j  a v  a2s. com
    }

    valueArray = null;
    kmerIndexPathArray = null;

    if (filteredValueArray.size() <= 1) {
        // skip
        return;
    }

    int[] fileid_arr = new int[filteredValueArray.size()];

    for (int i = 0; i < filteredValueArray.size(); i++) {
        int fileidInt = 0;
        String indexFilename = filteredKmerIndexPathArray.get(i).getName();
        Integer fileid = this.idCacheTable.get(indexFilename);
        if (fileid == null) {
            String fastaFilename = KmerIndexHelper.getFastaFileName(indexFilename);
            int id = this.fileMapping.getIDFromFastaFile(fastaFilename);
            this.idCacheTable.put(indexFilename, id);
            fileidInt = id;
        } else {
            fileidInt = fileid.intValue();
        }

        fileid_arr[i] = fileidInt;
    }

    // compute normal
    double[] normal = new double[this.valuesLen];
    for (int i = 0; i < this.valuesLen; i++) {
        normal[i] = 0;
    }

    for (int i = 0; i < filteredValueArray.size(); i++) {
        IntWritable arr = filteredValueArray.get(i);
        int freq = arr.get();
        double tf = 1 + Math.log10(freq);
        normal[fileid_arr[i]] = ((double) tf) / this.tfConsineNormBase[fileid_arr[i]];
    }

    accumulateScore(normal);

    this.reportCounter.increment(1);
}

From source file:libra.core.kmersimilarity_r.KmerSimilarityMapper.java

License:Apache License

@Override
protected void map(CompressedSequenceWritable key, IntWritable value, Context context)
        throws IOException, InterruptedException {
    int[] arr = new int[2];
    arr[0] = this.file_id;
    arr[1] = value.get();

    context.write(key, new CompressedIntArrayWritable(arr));
}

From source file:libra.preprocess.stage2.KmerIndexBuilderCombiner.java

License:Apache License

@Override
protected void reduce(CompressedSequenceWritable key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int frequency = 0;

    for (IntWritable value : values) {
        frequency += value.get();
    }/*from   ww  w. ja  v a 2  s . c o m*/

    context.write(key, new IntWritable(frequency));
}

From source file:libra.preprocess.stage2.KmerIndexBuilderReducer.java

License:Apache License

@Override
protected void reduce(CompressedSequenceWritable key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
    int frequency = 0;

    for (IntWritable value : values) {
        frequency += value.get();
    }// w  ww .  j av  a2 s.  c  o  m

    // compute base
    if (frequency > 0) {
        this.logTFSquareCounter.increment((long) (Math.pow(1 + Math.log10(frequency), 2) * 1000));
    }
    context.write(key, new IntWritable(frequency));
}

From source file:mahout.analysis.MahoutOAReducer.java

@Override
public void reduce(Text id, Iterator<IntWritable> counts, OutputCollector<Text, IntWritable> output,
        Reporter reporter) throws IOException {

    int totalCount = 0;
    while (counts.hasNext()) {
        IntWritable count = counts.next();
        totalCount += count.get();
    }//  w  ww.ja va2  s.  com

    if (totalCount > 100) {
        output.collect(id, new IntWritable(totalCount));
    }
}

From source file:microbench.WordCountOnHDFSDataLocal.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException {
    try {/*www  . j  a  v  a2  s  .co  m*/
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                DataMPIUtil.printArgs(args);
            }
            System.out.println("The O task " + rank + " of " + size + " is working...");

            HadoopReader<LongWritable, Text> reader = HadoopIOUtil.getReader(jobConf, inDir,
                    TextInputFormat.class, rank, MPI_D.COMM_BIPARTITE_O);
            Text word = new Text();
            IntWritable one = new IntWritable(1);
            LongWritable khead = reader.createKey();
            Text vhead = reader.createValue();
            while (reader.next(khead, vhead)) {
                StringTokenizer itr = new StringTokenizer(vhead.toString());
                while (itr.hasMoreTokens()) {
                    word.set(itr.nextToken());
                    // send key-value
                    MPI_D.Send(word, one);
                }
            }
            reader.close();
        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_A);
            System.out.println("The A task " + rank + " of " + size + " is working...");

            HadoopWriter<Text, IntWritable> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, Text.class,
                    IntWritable.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            Text oldKey = null;
            IntWritable valueData = new IntWritable();
            int sum = 0;
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                Text key = (Text) keyValue[0];
                IntWritable value = (IntWritable) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                    sum = value.get();
                } else {
                    if (key.equals(oldKey)) {
                        sum += value.get();
                    } else {
                        valueData.set(sum);
                        outrw.write(oldKey, valueData);
                        oldKey = key;
                        sum = value.get();
                    }
                }
                keyValue = MPI_D.Recv();
            }
            if (oldKey != null) {
                valueData.set(sum);
                outrw.write(oldKey, valueData);
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:minor_MapReduce.C4_5.java

License:Open Source License

private static void summarizeData() throws Exception {
    Job job = Job.getInstance();//from w  ww  .  jav a2 s.  co  m
    job.setJarByClass(C4_5.class);
    job.setJobName("C4.5_summarizeData");

    FileInputFormat.addInputPath(job, input_path);
    FileOutputFormat.setOutputPath(job, summarized_data_path);

    job.setMapperClass(SummarizeMapper.class);
    job.setReducerClass(SummarizeReducer.class);

    job.setOutputKeyClass(TextArrayWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.waitForCompletion(false);

    /* Store it locally */
    Option optPath = SequenceFile.Reader.file(new Path(summarized_data_path.toString() + "/part-r-00000"));
    SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), optPath);

    TextArrayWritable key = new TextArrayWritable();
    IntWritable val = new IntWritable();

    summarized_data = new HashMap<String[], Integer>();
    while (reader.next(key, val)) {
        summarized_data.put(key.toStrings(), val.get());
    }

    reader.close();
}

From source file:ml.grafos.okapi.graphs.betweeness.BetweenessComputation.java

License:Apache License

/**
 * Return the current global state/*www. j a v a 2 s .c o m*/
 *
 * @return State that stores the current global state
 */
private State getCurrentGlobalState() {
    IntWritable stateInt = this.getAggregatedValue(BetweenessMasterCompute.STATE_AGG);
    return State.values()[stateInt.get()];
}