List of usage examples for org.apache.hadoop.io LongWritable get
public long get()
From source file:edu.uci.ics.pregelix.example.GraphSampleUndirectedVertex.java
License:Apache License
@Override public void configure(Configuration conf) { try {/*from www. j a v a 2 s . co m*/ globalRate = conf.getFloat(GLOBAL_RATE, 0); seedInterval = (int) (1.0 / (globalRate / 100)); if (getSuperstep() > 1) { LongWritable totalSelectedVertex = (LongWritable) IterationUtils.readGlobalAggregateValue(conf, BspUtils.getJobId(conf), GlobalSamplingAggregator.class.getName()); LongWritable totalVertex = (LongWritable) IterationUtils.readGlobalAggregateValue(conf, BspUtils.getJobId(conf), GlobalVertexCountAggregator.class.getName()); fillingRate = (float) totalSelectedVertex.get() / (float) totalVertex.get(); } } catch (Exception e) { throw new IllegalStateException(e); } }
From source file:edu.ucsb.cs.hybrid.io.TwoStageLoadbalancing.java
License:Apache License
public static void main(int step, Path inputDir, JobConf job) throws IOException { FileSystem hdfs = inputDir.getFileSystem(job); if (!hdfs.exists(Collector.partitionSizesPath)) { System.out.println("Partition sizes file does not exists!"); return;// ww w .j a v a2 s . c om } debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE); MapFile.Reader partitionSizeReader = new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(), new JobConf()); Text partitionK = new Text(); LongWritable partSizeV = new LongWritable(); try { while (partitionSizeReader.next(partitionK, partSizeV)) { partitionsNames.add(partitionK.toString()); // useless? partitionsSizes.put(partitionK.toString(), partSizeV.get()); } } catch (Exception e) { ; } for (int i = 0; i < partitionsNames.size(); i++) { System.out.println("Partition " + partitionsNames.get(i) + " has " + partitionsSizes.get(partitionsNames.get(i)) + " vectors."); } if (partitionsNames.size() <= 1) return; stage0(); printUndirectedNeighbors("Stage0"); printPartitionsStat("Stage0"); printCircularPartitionsWeight("\nCircular"); calcCWStandardDeviation(); stage1(); printDirectedNeighbors("Stage1"); System.out.println("Stage 1 final weights: "); printPartitionsWeights("Stage1"); if ((step == 2) || (step == 12)) { stage2(); printDirectedNeighbors("Stage2"); System.out.println("Stage 2 final weights: "); printPartitionsWeights("Stage2"); } // stage3(job, hdfs); writeComparisonList(job, hdfs); // printComparisonList(job, hdfs);// remove }
From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java
License:Apache License
public Object buildInvertedIndex(boolean log) throws IOException { HashMap<Long, ArrayList<PostingDocWeight>> dynSIndex = new HashMap<Long, ArrayList<PostingDocWeight>>(); ArrayList<Long> dynamicIdMap = null; LongWritable doc = new LongWritable(); FeatureWeightArrayWritable vector = new FeatureWeightArrayWritable(); totalTerms = 0;/*from w w w. j a v a 2s . c o m*/ int nVectors = 0; float threshold = Config.THRESHOLD_VALUE; dynamicIdMap = new ArrayList<Long>(); googleDynSkip = conf.getBoolean(Config.BAYADRO_SKIP_PROPERTY, Config.BAYADRO_SKIP_VALUE); if (googleDynSkip) dynamicSmaxw = new ArrayList<Float>(); // if(compareDynamically) // threshold = conf.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE); long startTime = System.currentTimeMillis(); while (!splitLimitReached(nVectors) && input.next(doc, vector)) { /* Mapping docs IDs to serial numbers 0,1,2 ...S */ dynamicIdMap.add(doc.get()); if (googleDynSkip) dynamicSmaxw.add(vector.getMaxWeight()); int numTerms = vector.getSize(); totalTerms += numTerms; /* Adding one document into S inverted index */ for (int i = 0; i < numTerms; i++) { ArrayList<PostingDocWeight> posting = getPosting(dynSIndex, vector.getFeature(i)); PostingDocWeight item = new PostingDocWeight(nVectors, vector.getWeight(i)); posting.add(item); } nVectors++; } S_size += nVectors; if (log) { long distinctTerms = dynSIndex.size(); System.out.println("LOG: Build inverted index time in millisec:" + ((System.currentTimeMillis() - startTime)) + "\nLOG: Number of distict features:" + distinctTerms + "\nLOG: Reduction in features storage via uniqeness:" + ((totalTerms - distinctTerms) / (float) totalTerms * 100) + "\nAvg posting length per feature:" + totalTerms / (float) distinctTerms); } if (S_size == 0) return null; else return convertDynamicStatic(dynSIndex, dynamicIdMap); }
From source file:edu.ucsb.cs.invertedindex.mappers.InvertedMapper.java
License:Apache License
public void map(LongWritable key, FeatureWeightArrayWritable value, OutputCollector<LongWritable, DocWeight> output, Reporter reporter) throws IOException { post.docId = key.get(); for (FeatureWeight item : value.vector) { post.weight = item.weight;// w w w .j ava2 s. co m feature.set(item.feature); output.collect(feature, post); } }
From source file:edu.ucsb.cs.invertedindex.reducers.InvertedReducer.java
License:Apache License
public void reduce(LongWritable key, Iterator<DocWeight> values, OutputCollector<LongWritable, DocWeightArrayWritable> output, Reporter reporter) throws IOException { feature.set(key.get()); postings.clear();/*from w ww. ja v a 2 s .c om*/ while (values.hasNext()) { DocWeight posting = values.next(); postings.addPair(posting.docId, posting.weight); } output.collect(feature, postings); }
From source file:edu.ucsb.cs.knn.core.KnnMapper.java
License:Apache License
public void compareWithOthers(Reader reader) throws IOException { int nSongs = SongUsersIndex.keySet().size(); LongWritable key = new LongWritable(); PostingUserArrayWritable value = new PostingUserArrayWritable(); Long[] songIds = new Long[nSongs]; SongUsersIndex.keySet().toArray(songIds); for (int i = 0; i < nSongs - 1; i++) { PostingUser[] songiUsers = SongUsersIndex.get(songIds[i]); // Read one song with its users posting at a time where this song is // bigger than my song while (reader.next(key, value)) if (songIds[i] < key.get()) computeAC(songIds[i], key.get(), songiUsers, value.getPosting(), false); }/*from w w w . j a v a2 s .c o m*/ }
From source file:edu.ucsb.cs.knn.query.QueryMain.java
License:Apache License
public static void main(String[] args) throws IOException { JobConf job = new JobConf(); Path queryPath = new Path("query"); FileSystem hdfs = queryPath.getFileSystem(job); if (!hdfs.exists(queryPath)) throw new UnsupportedEncodingException("Query is not set"); FSDataInputStream in = hdfs.open(queryPath); String line;/*ww w .ja v a 2 s . c om*/ // Get songId to predict its rating = s^i BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); System.out.print("Enter song id of music you want its predicted rate: "); String songId = br.readLine(); // Get songId neighbourhood (real) MapFile.Reader reader = new MapFile.Reader(hdfs, "knn-output/part-00000", new Configuration()); LongWritable key = new LongWritable(Integer.parseInt(songId)); NeighboursArrayWritable neighborhood = new NeighboursArrayWritable(); reader.get(key, neighborhood); System.out.println("Real neighbourhood of " + songId + " is " + neighborhood.toString()); // Process each user while ((line = in.readLine()) != null) { float predictedRateUp = 0f; float predictedRateDown = 0f; StringTokenizer str = new StringTokenizer(line.toString(), " |\t"); long userId = Long.parseLong(str.nextToken()); int nRatings = Integer.parseInt(str.nextToken()); float predictedValue; boolean rated = false; for (int lineNo = 0; lineNo < nRatings; lineNo++) { line = in.readLine(); // <songid rate> str = new StringTokenizer(line.toString(), " |\t"); long currentUserSong = Long.parseLong(str.nextToken()); int currentUserRate = Integer.parseInt(str.nextToken()); if (currentUserSong == key.get()) { rated = true; System.out.println("User " + userId + " already rated this song to " + currentUserRate); while (lineNo < nRatings) { in.readLine(); lineNo++; } break; } float wij = neighborhood.getWeight(currentUserSong); predictedRateUp += currentUserRate * wij; predictedRateDown += Math.abs(wij); } // Predicted rating for this user if (!rated) System.out.println( "Predicted rating for user " + userId + " is " + (predictedRateUp / predictedRateDown)); } }
From source file:edu.ucsb.cs.lsh.statistics.LshStat.java
License:Apache License
public static void lshProjectionStat(String[] args) throws IOException { boolean produceMax = false; if (args.length == 3) produceMax = true;// w ww . j a v a2s . co m else if (args.length != 2) printUsage(3); Path inputPath = new Path(args[1]); Configuration conf = new Configuration(); FileSystem fs = inputPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(inputPath); long i = 0, bucketCount = 0, avgBucketSize = 0, maxBucket = 0, minBucket = Long.MAX_VALUE; ArrayList<Integer> bucketSizes = new ArrayList<Integer>(); for (FileStatus file : files) { if ((fs.isDirectory(file.getPath())) || file.getPath().getName().startsWith("_")) continue; Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); LongWritable key = new LongWritable(); FeatureWeightArrayWritable value = new FeatureWeightArrayWritable(); while (reader.next(key, value)) { if (key.get() == 0) { bucketCount++; avgBucketSize += i; if (maxBucket < i) { maxBucket = i; maxBucketID = (bucketCount - 1); } if (i != 0 && minBucket > i) minBucket = i; i = 0; } else { i++; } } avgBucketSize += i; bucketSizes.add((int) i); } System.out.println("Number of buckets:" + bucketCount); System.out.println("Max. bucket size:" + maxBucket + " with ID:" + maxBucketID); System.out.println("Min. bucket size:" + minBucket); System.out.println("Avg. buckets size:" + (avgBucketSize / (float) bucketCount)); System.out.println( "R-std. among bucket sizes:" + getRStd((avgBucketSize / (float) bucketCount), bucketSizes)); System.out.println("Total comparison done within buckets:" + getSumCombin(bucketSizes)); if (produceMax) produceMaxBucket(args); // getRepatedPairs(files, fs, conf); }
From source file:edu.ucsb.cs.lsh.statistics.LshStat.java
License:Apache License
public static void getRepatedPairs(FileStatus[] files, FileSystem fs, Configuration conf) throws IOException { NumByteList bucket = null;// ww w . j av a 2s . co m long i = 0, bucketCount = 0; ArrayList<NumByteList> buckets = new ArrayList<NumByteList>(); for (FileStatus file : files) { if ((fs.isDirectory(file.getPath())) || file.getPath().getName().startsWith("_")) continue; Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); LongWritable key = new LongWritable(); FeatureWeightArrayWritable value = new FeatureWeightArrayWritable(); while (reader.next(key, value)) { if (key.get() == 0) { if (bucketCount != 0) buckets.add(bucket); bucketCount++; bucket = new NumByteList(bucketCount); i = 0; } else { i++; bucket.addDoc(key.get()); } } } System.out.println("Number of repeated docs across buckets: " + getRepetedPairsCount(buckets)); }
From source file:edu.ucsb.cs.lsh.statistics.LshStat.java
License:Apache License
public static void produceMaxBucket(String args[]) throws IOException { if (args.length == 3) maxBucketID = Integer.parseInt(args[2]); else if (args.length != 2) printUsage(4);/* ww w .j av a 2 s .c o m*/ Path inputPath = new Path(args[1]); Path outPath = new Path("maxBucket"); Configuration conf = new Configuration(); FileSystem fs = inputPath.getFileSystem(conf); if (fs.exists(outPath)) fs.delete(outPath); FileStatus[] files = fs.listStatus(inputPath); SequenceFile.Writer writer = null; int bucketCount = 0; for (FileStatus file : files) { if ((fs.isDirectory(file.getPath())) || file.getPath().getName().startsWith("_")) continue; Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); LongWritable key = new LongWritable(); FeatureWeightArrayWritable value = new FeatureWeightArrayWritable(); while (reader.next(key, value)) if (key.get() == 0) { bucketCount++; if (bucketCount == maxBucketID) { writer = SequenceFile.createWriter(fs, conf, outPath, LongWritable.class, FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE); while (reader.next(key, value) && (key.get() != 0)) writer.append(key, value); writer.close(); return; } } } }