List of usage examples for org.apache.hadoop.io IntWritable get
public int get()
From source file:edu.umd.cloud9.io.HashMapWritableTest.java
License:Apache License
@Test public void testSerialize1() throws IOException { HashMapWritable<Text, IntWritable> origMap = new HashMapWritable<Text, IntWritable>(); origMap.put(new Text("hi"), new IntWritable(5)); origMap.put(new Text("there"), new IntWritable(22)); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); origMap.write(dataOut);/*from w w w. j a v a2 s . c om*/ HashMapWritable<Text, IntWritable> map = new HashMapWritable<Text, IntWritable>(); map.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); Text key; IntWritable value; assertEquals(map.size(), 2); key = new Text("hi"); value = map.get(key); assertTrue(value != null); assertEquals(value.get(), 5); value = map.remove(key); assertEquals(map.size(), 1); key = new Text("there"); value = map.get(key); assertTrue(value != null); assertEquals(value.get(), 22); }
From source file:edu.umd.cloud9.pagerank.RangePartitioner.java
License:Apache License
public int getPartition(IntWritable key, Writable value, int numReduceTasks) { return (int) (((float) key.get() / (float) mNodeCnt) * numReduceTasks) % numReduceTasks; }
From source file:edu.umd.cloud9.webgraph.data.IndexableAnchorTextForwardIndex.java
License:Apache License
public IndexableAnchorText getDocument(int docno) { int idx = Arrays.binarySearch(docnos, docno); if (idx < 0) idx = -idx - 2;//from w ww. j av a2s. c o m DecimalFormat df = new DecimalFormat("00000"); String file = collectionPath + "/part-" + df.format(filenos[idx]); try { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(file), conf); IntWritable key = new IntWritable(); ArrayListWritable<AnchorText> value = new ArrayListWritable<AnchorText>(); reader.seek(offsets[idx]); while (reader.next(key)) { if (key.get() == docno) break; } reader.getCurrentValue(value); reader.close(); indexableAnchorText.createHTML(value); return indexableAnchorText; } catch (IOException e) { e.printStackTrace(); } return null; }
From source file:edu.umd.cloud9.webgraph.data.IndexableAnchorTextForwardIndex.java
License:Apache License
public int getLastDocno() { if (mLastDocno != -1) return mLastDocno; // find the last entry, and then see all the way to the end of the // collection int idx = docnos.length - 1; String file = collectionPath + "/part-" + df.format(filenos[idx]); try {//from w ww . j a va 2 s . c o m SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(file), conf); IntWritable key = new IntWritable(); reader.seek(offsets[idx]); while (reader.next(key)) ; mLastDocno = key.get(); } catch (IOException e) { e.printStackTrace(); } return mLastDocno; }
From source file:edu.umd.shrawanraina.ExtractTopPersonalizedPageRankNodes.java
License:Apache License
@SuppressWarnings("deprecation") private void extractTop(String inputPath, String outputPath, String sources, int n) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException, InstantiationException, IllegalAccessException { // TODO Auto-generated method stub /*/* w w w .j av a 2 s. co m*/ Configuration conf = getConf(); conf.setStrings("sources", sources); conf.setInt(LIMIT, n); Job job = Job.getInstance(conf); job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getName() + ":" + inputPath); job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath + "-" + "Top")); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(FloatWritable.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath + "-" + "Top"), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); */ Configuration conf = new Configuration(); Path in = new Path(inputPath); FileSystem fs = FileSystem.get(conf); fs.delete(new Path(in + "/_SUCCESS"), true); List<TopScoredObjects<Integer>> queueList = new ArrayList<TopScoredObjects<Integer>>(); List<String> sourceList = Arrays.asList(sources.split(",")); for (int i = 0; i < sourceList.size(); i++) queueList.add(i, new TopScoredObjects<Integer>(n)); //System.out.println("Source : <<<<<<<"+sourceList.size()); FileStatus[] fss = fs.listStatus(new Path(in + "/")); for (FileStatus status : fss) { Path path = status.getPath(); //System.out.println("Path: <<<<<<< "+ path); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); IntWritable key = new IntWritable(); PageRankNodeUpd value = new PageRankNodeUpd(); while (reader.next(key, value)) { for (int i = 0; i < sourceList.size(); i++) { queueList.get(i).add(key.get(), value.getPageRankList().get(i)); //System.out.println(key.get() + " | " + value.getPageRankList().get(i)); } } reader.close(); } //System.out.println("List : <<<<<<<"+queueList.size()); for (int i = 0; i < sourceList.size(); i++) { TopScoredObjects<Integer> queue = queueList.get(i); System.out.println("Source : <<<<<<<" + sourceList.get(i)); for (PairOfObjectFloat<Integer> pair : queue.extractAll()) { int nodeid = ((Integer) pair.getLeftElement()); float pagerank = (float) Math.exp(pair.getRightElement()); System.out.println(String.format("%.5f %d", pagerank, nodeid)); } } }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static <S1 extends Shape, S2 extends Shape> int SpatialJoin_planeSweepFilterOnly(final List<S1> R, final List<S2> S, final ResultCollector2<S1, S2> output, Reporter reporter) throws IOException { LOG.debug("Start spatial join plan sweep algorithm !!!"); final RectangleID[] Rmbrs = new RectangleID[R.size()]; for (int i = 0; i < R.size(); i++) { Rmbrs[i] = new RectangleID(i, R.get(i).getMBR()); }/*www . jav a2s.c o m*/ final RectangleID[] Smbrs = new RectangleID[S.size()]; for (int i = 0; i < S.size(); i++) { Smbrs[i] = new RectangleID(i, S.get(i).getMBR()); } final IntWritable count = new IntWritable(); int filterCount = SpatialJoin_rectangles(Rmbrs, Smbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { //if (R.get(r1.id).isIntersected(S.get(r2.id))) { if (output != null) output.collect(R.get(r1.id), S.get(r2.id)); count.set(count.get() + 1); //} } }, reporter); LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get()); return count.get(); }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
/** * The general version of self join algorithm which works with arbitrary * shapes. First, it performs a filter step where it finds shapes with * overlapping MBRs. Second, an optional refine step can be executed to * return only shapes which actually overlap. * @param R - input set of shapes/*from w w w .j ava 2 s . c om*/ * @param refine - Whether or not to run a refine step * @param output - output collector where the results are reported * @return - number of pairs returned by the planesweep algorithm * @throws IOException */ public static <S extends Shape> int SelfJoin_planeSweep(final S[] R, boolean refine, final OutputCollector<S, S> output, Progressable reporter) throws IOException { // Use a two-phase filter and refine approach // 1- Use MBRs as a first filter // 2- Use ConvexHull as a second filter // 3- Use the exact shape for refinement final RectangleID[] mbrs = new RectangleID[R.length]; for (int i = 0; i < R.length; i++) { mbrs[i] = new RectangleID(i, R[i].getMBR()); } if (refine) { final IntWritable count = new IntWritable(); int filterCount = SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { if (R[r1.id].isIntersected(R[r2.id])) { if (output != null) output.collect(R[r1.id], R[r2.id]); count.set(count.get() + 1); } } }, reporter); LOG.debug("Filtered result size " + filterCount + ", refined result size " + count.get()); return count.get(); } else { return SelfJoin_rectangles(mbrs, new OutputCollector<RectangleID, RectangleID>() { @Override public void collect(RectangleID r1, RectangleID r2) throws IOException { if (output != null) output.collect(R[r1.id], R[r2.id]); } }, reporter); } }
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
/** * A MapReduce version of KNN query./*from ww w .j a va 2 s .c o m*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNN.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.spatialHadoop.operations.Union.java
License:Open Source License
private static <S extends OGCJTSShape> void unionLocal(Path inPath, Path outPath, final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { // 1- Split the input path/file to get splits that can be processed independently final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>(); Job job = Job.getInstance(params);/*from w w w .jav a 2 s. co m*/ SpatialInputFormat3.setInputPaths(job, inPath); final List<InputSplit> splits = inputFormat.getSplits(job); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); // 2- Process splits in parallel final List<Float> progresses = new Vector<Float>(); final IntWritable overallProgress = new IntWritable(0); List<List<Geometry>> results = Parallel.forEach(splits.size(), new RunnableRange<List<Geometry>>() { @Override public List<Geometry> run(final int i1, final int i2) { final int pi; final IntWritable splitsProgress = new IntWritable(); synchronized (progresses) { pi = progresses.size(); progresses.add(0f); } final float progressRatio = (i2 - i1) / (float) splits.size(); Progressable progress = new Progressable.NullProgressable() { @Override public void progress(float p) { progresses.set(pi, p * ((splitsProgress.get() - i1) / (float) (i2 - i1)) * progressRatio); float sum = 0; for (float f : progresses) sum += f; int newProgress = (int) (sum * 100); if (newProgress > overallProgress.get()) { overallProgress.set(newProgress); LOG.info("Local union progress " + newProgress + "%"); } } }; final List<Geometry> localUnion = new ArrayList<Geometry>(); ResultCollector<Geometry> output = new ResultCollector<Geometry>() { @Override public void collect(Geometry r) { localUnion.add(r); } }; final int MaxBatchSize = 100000; Geometry[] batch = new Geometry[MaxBatchSize]; int batchSize = 0; for (int i = i1; i < i2; i++) { splitsProgress.set(i); try { FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<S> shapes = reader.getCurrentValue(); for (S s : shapes) { if (s.geom == null) continue; batch[batchSize++] = s.geom; if (batchSize >= MaxBatchSize) { SpatialAlgorithms.multiUnion(batch, progress, output); batchSize = 0; } } } reader.close(); } catch (IOException e) { LOG.error("Error processing split " + splits.get(i), e); } catch (InterruptedException e) { LOG.error("Error processing split " + splits.get(i), e); } } // Union all remaining geometries try { Geometry[] finalBatch = new Geometry[batchSize]; System.arraycopy(batch, 0, finalBatch, 0, batchSize); SpatialAlgorithms.multiUnion(finalBatch, progress, output); return localUnion; } catch (IOException e) { // Should never happen as the context is passed as null throw new RuntimeException("Error in local union", e); } } }, parallelism); // Write result to output LOG.info("Merge the results of all splits"); int totalNumGeometries = 0; for (List<Geometry> result : results) totalNumGeometries += result.size(); List<Geometry> allInOne = new ArrayList<Geometry>(totalNumGeometries); for (List<Geometry> result : results) allInOne.addAll(result); final S outShape = (S) params.getShape("shape"); final PrintStream out; if (outPath == null || !params.getBoolean("output", true)) { // Skip writing the output out = new PrintStream(new NullOutputStream()); } else { FileSystem outFS = outPath.getFileSystem(params); out = new PrintStream(outFS.create(outPath)); } SpatialAlgorithms.multiUnion(allInOne.toArray(new Geometry[allInOne.size()]), new Progressable.NullProgressable() { int lastProgress = 0; public void progress(float p) { int newProgresss = (int) (p * 100); if (newProgresss > lastProgress) { LOG.info("Global union progress " + (lastProgress = newProgresss) + "%"); } } }, new ResultCollector<Geometry>() { Text line = new Text2(); @Override public void collect(Geometry r) { outShape.geom = r; outShape.toText(line); out.println(line); } }); out.close(); }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
/** * A MapReduce version of KNN query./*from w w w. j a v a 2 s. c o m*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "PKNN"); job.setJarByClass(HSPKNNQ.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }