Example usage for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get()

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:edu.uci.ics.pregelix.example.GraphSampleUndirectedVertex.java

License:Apache License

@Override
public void configure(Configuration conf) {
    try {/*from  www.  j a  v a 2 s  .  co  m*/
        globalRate = conf.getFloat(GLOBAL_RATE, 0);
        seedInterval = (int) (1.0 / (globalRate / 100));
        if (getSuperstep() > 1) {
            LongWritable totalSelectedVertex = (LongWritable) IterationUtils.readGlobalAggregateValue(conf,
                    BspUtils.getJobId(conf), GlobalSamplingAggregator.class.getName());
            LongWritable totalVertex = (LongWritable) IterationUtils.readGlobalAggregateValue(conf,
                    BspUtils.getJobId(conf), GlobalVertexCountAggregator.class.getName());
            fillingRate = (float) totalSelectedVertex.get() / (float) totalVertex.get();
        }
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
}

From source file:edu.ucsb.cs.hybrid.io.TwoStageLoadbalancing.java

License:Apache License

public static void main(int step, Path inputDir, JobConf job) throws IOException {
    FileSystem hdfs = inputDir.getFileSystem(job);
    if (!hdfs.exists(Collector.partitionSizesPath)) {
        System.out.println("Partition sizes file does not exists!");
        return;//  ww  w  .j  a v a2  s . c om
    }
    debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE);
    MapFile.Reader partitionSizeReader = new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(),
            new JobConf());
    Text partitionK = new Text();
    LongWritable partSizeV = new LongWritable();

    try {
        while (partitionSizeReader.next(partitionK, partSizeV)) {
            partitionsNames.add(partitionK.toString()); // useless?
            partitionsSizes.put(partitionK.toString(), partSizeV.get());
        }
    } catch (Exception e) {
        ;
    }
    for (int i = 0; i < partitionsNames.size(); i++) {
        System.out.println("Partition " + partitionsNames.get(i) + " has "
                + partitionsSizes.get(partitionsNames.get(i)) + " vectors.");
    }

    if (partitionsNames.size() <= 1)
        return;
    stage0();
    printUndirectedNeighbors("Stage0");
    printPartitionsStat("Stage0");

    printCircularPartitionsWeight("\nCircular");
    calcCWStandardDeviation();

    stage1();
    printDirectedNeighbors("Stage1");
    System.out.println("Stage 1 final weights: ");
    printPartitionsWeights("Stage1");
    if ((step == 2) || (step == 12)) {
        stage2();
        printDirectedNeighbors("Stage2");
        System.out.println("Stage 2 final weights: ");
        printPartitionsWeights("Stage2");
    }
    // stage3(job, hdfs);
    writeComparisonList(job, hdfs);
    // printComparisonList(job, hdfs);// remove
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java

License:Apache License

public Object buildInvertedIndex(boolean log) throws IOException {
    HashMap<Long, ArrayList<PostingDocWeight>> dynSIndex = new HashMap<Long, ArrayList<PostingDocWeight>>();
    ArrayList<Long> dynamicIdMap = null;
    LongWritable doc = new LongWritable();
    FeatureWeightArrayWritable vector = new FeatureWeightArrayWritable();
    totalTerms = 0;/*from  w  w w. j a  v  a 2s  .  c o  m*/
    int nVectors = 0;
    float threshold = Config.THRESHOLD_VALUE;
    dynamicIdMap = new ArrayList<Long>();
    googleDynSkip = conf.getBoolean(Config.BAYADRO_SKIP_PROPERTY, Config.BAYADRO_SKIP_VALUE);
    if (googleDynSkip)
        dynamicSmaxw = new ArrayList<Float>();
    //      if(compareDynamically)
    //         threshold = conf.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE);

    long startTime = System.currentTimeMillis();
    while (!splitLimitReached(nVectors) && input.next(doc, vector)) {
        /* Mapping docs IDs to serial numbers 0,1,2 ...S */
        dynamicIdMap.add(doc.get());
        if (googleDynSkip)
            dynamicSmaxw.add(vector.getMaxWeight());
        int numTerms = vector.getSize();
        totalTerms += numTerms;
        /* Adding one document into S inverted index */
        for (int i = 0; i < numTerms; i++) {
            ArrayList<PostingDocWeight> posting = getPosting(dynSIndex, vector.getFeature(i));
            PostingDocWeight item = new PostingDocWeight(nVectors, vector.getWeight(i));
            posting.add(item);
        }
        nVectors++;
    }
    S_size += nVectors;
    if (log) {
        long distinctTerms = dynSIndex.size();
        System.out.println("LOG: Build inverted index time in millisec:"
                + ((System.currentTimeMillis() - startTime)) + "\nLOG: Number of distict features:"
                + distinctTerms + "\nLOG: Reduction in features storage via uniqeness:"
                + ((totalTerms - distinctTerms) / (float) totalTerms * 100)
                + "\nAvg posting length per feature:" + totalTerms / (float) distinctTerms);
    }
    if (S_size == 0)
        return null;
    else
        return convertDynamicStatic(dynSIndex, dynamicIdMap);
}

From source file:edu.ucsb.cs.invertedindex.mappers.InvertedMapper.java

License:Apache License

public void map(LongWritable key, FeatureWeightArrayWritable value,
        OutputCollector<LongWritable, DocWeight> output, Reporter reporter) throws IOException {
    post.docId = key.get();
    for (FeatureWeight item : value.vector) {
        post.weight = item.weight;// w w w .j  ava2 s. co m
        feature.set(item.feature);
        output.collect(feature, post);
    }
}

From source file:edu.ucsb.cs.invertedindex.reducers.InvertedReducer.java

License:Apache License

public void reduce(LongWritable key, Iterator<DocWeight> values,
        OutputCollector<LongWritable, DocWeightArrayWritable> output, Reporter reporter) throws IOException {
    feature.set(key.get());
    postings.clear();/*from w  ww. ja  v  a 2 s  .c  om*/
    while (values.hasNext()) {
        DocWeight posting = values.next();
        postings.addPair(posting.docId, posting.weight);
    }
    output.collect(feature, postings);
}

From source file:edu.ucsb.cs.knn.core.KnnMapper.java

License:Apache License

public void compareWithOthers(Reader reader) throws IOException {
    int nSongs = SongUsersIndex.keySet().size();
    LongWritable key = new LongWritable();
    PostingUserArrayWritable value = new PostingUserArrayWritable();

    Long[] songIds = new Long[nSongs];
    SongUsersIndex.keySet().toArray(songIds);
    for (int i = 0; i < nSongs - 1; i++) {
        PostingUser[] songiUsers = SongUsersIndex.get(songIds[i]);
        // Read one song with its users posting at a time where this song is
        // bigger than my song
        while (reader.next(key, value))
            if (songIds[i] < key.get())
                computeAC(songIds[i], key.get(), songiUsers, value.getPosting(), false);
    }/*from   w  w w  . j  a  v a2  s  .c o m*/
}

From source file:edu.ucsb.cs.knn.query.QueryMain.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf();
    Path queryPath = new Path("query");
    FileSystem hdfs = queryPath.getFileSystem(job);
    if (!hdfs.exists(queryPath))
        throw new UnsupportedEncodingException("Query is not set");

    FSDataInputStream in = hdfs.open(queryPath);
    String line;/*ww w  .ja v a  2 s .  c om*/

    // Get songId to predict its rating = s^i
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    System.out.print("Enter song id of music you want its predicted rate: ");
    String songId = br.readLine();

    // Get songId neighbourhood (real)
    MapFile.Reader reader = new MapFile.Reader(hdfs, "knn-output/part-00000", new Configuration());
    LongWritable key = new LongWritable(Integer.parseInt(songId));
    NeighboursArrayWritable neighborhood = new NeighboursArrayWritable();
    reader.get(key, neighborhood);
    System.out.println("Real neighbourhood of " + songId + " is " + neighborhood.toString());

    // Process each user
    while ((line = in.readLine()) != null) {
        float predictedRateUp = 0f;
        float predictedRateDown = 0f;
        StringTokenizer str = new StringTokenizer(line.toString(), " |\t");
        long userId = Long.parseLong(str.nextToken());
        int nRatings = Integer.parseInt(str.nextToken());
        float predictedValue;
        boolean rated = false;
        for (int lineNo = 0; lineNo < nRatings; lineNo++) {
            line = in.readLine(); // <songid rate>
            str = new StringTokenizer(line.toString(), " |\t");
            long currentUserSong = Long.parseLong(str.nextToken());
            int currentUserRate = Integer.parseInt(str.nextToken());
            if (currentUserSong == key.get()) {
                rated = true;
                System.out.println("User " + userId + " already rated this song to " + currentUserRate);
                while (lineNo < nRatings) {
                    in.readLine();
                    lineNo++;
                }
                break;
            }
            float wij = neighborhood.getWeight(currentUserSong);
            predictedRateUp += currentUserRate * wij;
            predictedRateDown += Math.abs(wij);
        }
        // Predicted rating for this user
        if (!rated)
            System.out.println(
                    "Predicted rating for user " + userId + " is " + (predictedRateUp / predictedRateDown));
    }
}

From source file:edu.ucsb.cs.lsh.statistics.LshStat.java

License:Apache License

public static void lshProjectionStat(String[] args) throws IOException {
    boolean produceMax = false;
    if (args.length == 3)
        produceMax = true;//  w  ww  .  j a  v  a2s .  co m
    else if (args.length != 2)
        printUsage(3);

    Path inputPath = new Path(args[1]);
    Configuration conf = new Configuration();
    FileSystem fs = inputPath.getFileSystem(conf);
    FileStatus[] files = fs.listStatus(inputPath);
    long i = 0, bucketCount = 0, avgBucketSize = 0, maxBucket = 0, minBucket = Long.MAX_VALUE;
    ArrayList<Integer> bucketSizes = new ArrayList<Integer>();

    for (FileStatus file : files) {
        if ((fs.isDirectory(file.getPath())) || file.getPath().getName().startsWith("_"))
            continue;

        Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
        LongWritable key = new LongWritable();
        FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();

        while (reader.next(key, value)) {
            if (key.get() == 0) {
                bucketCount++;
                avgBucketSize += i;
                if (maxBucket < i) {
                    maxBucket = i;
                    maxBucketID = (bucketCount - 1);
                }
                if (i != 0 && minBucket > i)
                    minBucket = i;
                i = 0;
            } else {
                i++;
            }
        }
        avgBucketSize += i;
        bucketSizes.add((int) i);
    }
    System.out.println("Number of buckets:" + bucketCount);
    System.out.println("Max. bucket size:" + maxBucket + " with ID:" + maxBucketID);
    System.out.println("Min. bucket size:" + minBucket);
    System.out.println("Avg. buckets size:" + (avgBucketSize / (float) bucketCount));
    System.out.println(
            "R-std. among bucket sizes:" + getRStd((avgBucketSize / (float) bucketCount), bucketSizes));
    System.out.println("Total comparison done within buckets:" + getSumCombin(bucketSizes));
    if (produceMax)
        produceMaxBucket(args);
    // getRepatedPairs(files, fs, conf);
}

From source file:edu.ucsb.cs.lsh.statistics.LshStat.java

License:Apache License

public static void getRepatedPairs(FileStatus[] files, FileSystem fs, Configuration conf) throws IOException {
    NumByteList bucket = null;//  ww  w . j av a  2s  .  co m
    long i = 0, bucketCount = 0;
    ArrayList<NumByteList> buckets = new ArrayList<NumByteList>();

    for (FileStatus file : files) {
        if ((fs.isDirectory(file.getPath())) || file.getPath().getName().startsWith("_"))
            continue;

        Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
        LongWritable key = new LongWritable();
        FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();

        while (reader.next(key, value)) {
            if (key.get() == 0) {
                if (bucketCount != 0)
                    buckets.add(bucket);
                bucketCount++;
                bucket = new NumByteList(bucketCount);
                i = 0;
            } else {
                i++;
                bucket.addDoc(key.get());
            }
        }
    }
    System.out.println("Number of repeated docs across buckets: " + getRepetedPairsCount(buckets));
}

From source file:edu.ucsb.cs.lsh.statistics.LshStat.java

License:Apache License

public static void produceMaxBucket(String args[]) throws IOException {
    if (args.length == 3)
        maxBucketID = Integer.parseInt(args[2]);
    else if (args.length != 2)
        printUsage(4);/*  ww  w .j  av  a  2  s .c o m*/

    Path inputPath = new Path(args[1]);
    Path outPath = new Path("maxBucket");
    Configuration conf = new Configuration();
    FileSystem fs = inputPath.getFileSystem(conf);
    if (fs.exists(outPath))
        fs.delete(outPath);
    FileStatus[] files = fs.listStatus(inputPath);
    SequenceFile.Writer writer = null;
    int bucketCount = 0;

    for (FileStatus file : files) {
        if ((fs.isDirectory(file.getPath())) || file.getPath().getName().startsWith("_"))
            continue;

        Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
        LongWritable key = new LongWritable();
        FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();

        while (reader.next(key, value))
            if (key.get() == 0) {
                bucketCount++;
                if (bucketCount == maxBucketID) {
                    writer = SequenceFile.createWriter(fs, conf, outPath, LongWritable.class,
                            FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE);
                    while (reader.next(key, value) && (key.get() != 0))
                        writer.append(key, value);
                    writer.close();
                    return;
                }
            }
    }
}