List of usage examples for org.apache.hadoop.io IntWritable set
public void set(int value)
From source file:edu.umn.cs.spatialHadoop.operations.KNN.java
License:Open Source License
/** * A MapReduce version of KNN query./*from w w w . j av a 2 s . c om*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNN.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.spatialHadoop.operations.Union.java
License:Open Source License
private static <S extends OGCJTSShape> void unionLocal(Path inPath, Path outPath, final OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException { // 1- Split the input path/file to get splits that can be processed independently final SpatialInputFormat3<Rectangle, S> inputFormat = new SpatialInputFormat3<Rectangle, S>(); Job job = Job.getInstance(params);// w w w . ja v a 2 s .c om SpatialInputFormat3.setInputPaths(job, inPath); final List<InputSplit> splits = inputFormat.getSplits(job); int parallelism = params.getInt("parallel", Runtime.getRuntime().availableProcessors()); // 2- Process splits in parallel final List<Float> progresses = new Vector<Float>(); final IntWritable overallProgress = new IntWritable(0); List<List<Geometry>> results = Parallel.forEach(splits.size(), new RunnableRange<List<Geometry>>() { @Override public List<Geometry> run(final int i1, final int i2) { final int pi; final IntWritable splitsProgress = new IntWritable(); synchronized (progresses) { pi = progresses.size(); progresses.add(0f); } final float progressRatio = (i2 - i1) / (float) splits.size(); Progressable progress = new Progressable.NullProgressable() { @Override public void progress(float p) { progresses.set(pi, p * ((splitsProgress.get() - i1) / (float) (i2 - i1)) * progressRatio); float sum = 0; for (float f : progresses) sum += f; int newProgress = (int) (sum * 100); if (newProgress > overallProgress.get()) { overallProgress.set(newProgress); LOG.info("Local union progress " + newProgress + "%"); } } }; final List<Geometry> localUnion = new ArrayList<Geometry>(); ResultCollector<Geometry> output = new ResultCollector<Geometry>() { @Override public void collect(Geometry r) { localUnion.add(r); } }; final int MaxBatchSize = 100000; Geometry[] batch = new Geometry[MaxBatchSize]; int batchSize = 0; for (int i = i1; i < i2; i++) { splitsProgress.set(i); try { FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<S>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<S> shapes = reader.getCurrentValue(); for (S s : shapes) { if (s.geom == null) continue; batch[batchSize++] = s.geom; if (batchSize >= MaxBatchSize) { SpatialAlgorithms.multiUnion(batch, progress, output); batchSize = 0; } } } reader.close(); } catch (IOException e) { LOG.error("Error processing split " + splits.get(i), e); } catch (InterruptedException e) { LOG.error("Error processing split " + splits.get(i), e); } } // Union all remaining geometries try { Geometry[] finalBatch = new Geometry[batchSize]; System.arraycopy(batch, 0, finalBatch, 0, batchSize); SpatialAlgorithms.multiUnion(finalBatch, progress, output); return localUnion; } catch (IOException e) { // Should never happen as the context is passed as null throw new RuntimeException("Error in local union", e); } } }, parallelism); // Write result to output LOG.info("Merge the results of all splits"); int totalNumGeometries = 0; for (List<Geometry> result : results) totalNumGeometries += result.size(); List<Geometry> allInOne = new ArrayList<Geometry>(totalNumGeometries); for (List<Geometry> result : results) allInOne.addAll(result); final S outShape = (S) params.getShape("shape"); final PrintStream out; if (outPath == null || !params.getBoolean("output", true)) { // Skip writing the output out = new PrintStream(new NullOutputStream()); } else { FileSystem outFS = outPath.getFileSystem(params); out = new PrintStream(outFS.create(outPath)); } SpatialAlgorithms.multiUnion(allInOne.toArray(new Geometry[allInOne.size()]), new Progressable.NullProgressable() { int lastProgress = 0; public void progress(float p) { int newProgresss = (int) (p * 100); if (newProgresss > lastProgress) { LOG.info("Global union progress " + (lastProgress = newProgresss) + "%"); } } }, new ResultCollector<Geometry>() { Text line = new Text2(); @Override public void collect(Geometry r) { outShape.geom = r; outShape.toText(line); out.println(line); } }); out.close(); }
From source file:edu.umn.cs.sthadoop.operations.HSPKNNQ.java
License:Open Source License
/** * A MapReduce version of KNN query./*w w w .j av a 2 s. co m*/ * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "PKNN"); job.setJarByClass(HSPKNNQ.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:edu.umn.cs.sthadoop.trajectory.KNNDTW.java
License:Open Source License
/** * A MapReduce version of KNN query.//from w w w .j a va 2 s. c o m * * @param fs * @param inputPath * @param queryPoint * @param shape * @param output * @return * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static <S extends Shape> Job knnMapReduce(Path inputPath, Path userOutputPath, OperationsParams params) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(params, "KNN"); job.setJarByClass(KNNDTW.class); FileSystem inFs = inputPath.getFileSystem(params); job.setInputFormatClass(SpatialInputFormat3.class); SpatialInputFormat3.setInputPaths(job, inputPath); job.setMapperClass(KNNMap.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TextWithDistance.class); job.setReducerClass(KNNReduce.class); job.setNumReduceTasks(1); job.getConfiguration().setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class); final Point queryPoint = (Point) params.getShape("point"); final int k = params.getInt("k", 1); final IntWritable additional_blocks_2b_processed = new IntWritable(0); long resultCount; int iterations = 0; Path outputPath = userOutputPath; if (outputPath == null) { do { outputPath = new Path(inputPath.getName() + ".knn_" + (int) (Math.random() * 1000000)); } while (inFs.exists(outputPath)); } job.setOutputFormatClass(TextOutputFormat3.class); TextOutputFormat3.setOutputPath(job, outputPath); GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, inputPath); Configuration templateConf = job.getConfiguration(); FileSystem outFs = outputPath.getFileSystem(params); // Start with the query point to select all partitions overlapping with // it Shape range_for_this_iteration = new Point(queryPoint.x, queryPoint.y); do { job = new Job(templateConf); // Delete results of last iteration if not first iteration if (outputPath != null) outFs.delete(outputPath, true); LOG.info("Running iteration: " + (++iterations)); // Set query range for the SpatialInputFormat OperationsParams.setShape(job.getConfiguration(), RangeFilter.QueryRange, range_for_this_iteration); // Submit the job if (params.getBoolean("background", false)) { // XXX this is incorrect because if the job needs multiple // iterations, // it will run only the first one job.waitForCompletion(false); return job; } job.waitForCompletion(false); // Retrieve answers for this iteration Counters counters = job.getCounters(); Counter resultSizeCounter = counters.findCounter(Task.Counter.REDUCE_OUTPUT_RECORDS); resultCount = resultSizeCounter.getValue(); if (globalIndex != null) { Circle range_for_next_iteration; if (resultCount < k) { LOG.info("Found only " + resultCount + " results"); // Did not find enough results in the query space // Increase the distance by doubling the maximum distance // among all // partitions that were processed final DoubleWritable maximum_distance = new DoubleWritable(0); int matched_partitions = globalIndex.rangeQuery(range_for_this_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { double distance = p.getMaxDistanceTo(queryPoint.x, queryPoint.y); if (distance > maximum_distance.get()) maximum_distance.set(distance); } }); if (matched_partitions == 0) { // The query point is outside the search space // Set the range to include the closest partition globalIndex.knn(queryPoint.x, queryPoint.y, 1, new ResultCollector2<Partition, Double>() { @Override public void collect(Partition r, Double s) { maximum_distance.set(s); } }); } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, maximum_distance.get() * 2); LOG.info("Expanding to " + maximum_distance.get() * 2); } else { // Calculate the new test range which is a circle centered // at the // query point and distance to the k^{th} neighbor // Get distance to the kth neighbor final DoubleWritable distance_to_kth_neighbor = new DoubleWritable(); FileStatus[] results = outFs.listStatus(outputPath); for (FileStatus result_file : results) { if (result_file.getLen() > 0 && result_file.getPath().getName().startsWith("part-")) { // Read the last line (kth neighbor) Tail.tail(outFs, result_file.getPath(), 1, new TextWithDistance(), new ResultCollector<TextWithDistance>() { @Override public void collect(TextWithDistance r) { distance_to_kth_neighbor.set(r.distance); } }); } } range_for_next_iteration = new Circle(queryPoint.x, queryPoint.y, distance_to_kth_neighbor.get()); LOG.info("Expanding to kth neighbor: " + distance_to_kth_neighbor); } // Calculate the number of blocks to be processed to check the // terminating condition; additional_blocks_2b_processed.set(0); final Shape temp = range_for_this_iteration; globalIndex.rangeQuery(range_for_next_iteration, new ResultCollector<Partition>() { @Override public void collect(Partition p) { if (!(p.isIntersected(temp))) { additional_blocks_2b_processed.set(additional_blocks_2b_processed.get() + 1); } } }); range_for_this_iteration = range_for_next_iteration; } } while (additional_blocks_2b_processed.get() > 0); // If output file is not set by user, delete it if (userOutputPath == null) outFs.delete(outputPath, true); TotalIterations.addAndGet(iterations); return job; }
From source file:eu.stratosphere.addons.parquet.SequenceFileSourceTest.java
License:Apache License
private void populateSeqFile(File f, String[] content) throws IOException { URI uri = f.toURI();/*from w ww .j a v a 2 s.c om*/ Configuration conf = new JobConf(); FileSystem fs = FileSystem.get(uri, conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < content.length; i++) { key.set(i); value.set(content[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:hadoop.examples.hdfs.SequenceFileWriteDemo.java
License:Open Source License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String uri = "hdfs://exoplatform:9000/user/haint/temp.file"; Path path = new Path(uri); FileSystem fs = FileSystem.get(URI.create(uri), conf); ///*from ww w. j a v a 2 s.c om*/ IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:hivemall.classifier.KernelExpansionPassiveAggressiveUDTF.java
License:Apache License
@Override public void close() throws HiveException { final IntWritable h = new IntWritable(0); // row[0] final FloatWritable w0 = new FloatWritable(_w0); // row[1] final FloatWritable w1 = new FloatWritable(); // row[2] final FloatWritable w2 = new FloatWritable(); // row[3] final IntWritable hk = new IntWritable(0); // row[4] final FloatWritable w3 = new FloatWritable(); // row[5] final Object[] row = new Object[] { h, w0, null, null, null, null }; forward(row); // 0(f), w0 row[1] = null;//from ww w . ja va 2 s. c om row[2] = w1; row[3] = w2; final Int2FloatMap w2map = _w2; for (Int2FloatMap.Entry e : Fastutil.fastIterable(_w1)) { int k = e.getIntKey(); Preconditions.checkArgument(k > 0, HiveException.class); h.set(k); w1.set(e.getFloatValue()); w2.set(w2map.get(k)); forward(row); // h(f), w1, w2 } this._w1 = null; this._w2 = null; row[0] = null; row[2] = null; row[3] = null; row[4] = hk; row[5] = w3; _w3.int2FloatEntrySet(); for (Int2FloatMap.Entry e : Fastutil.fastIterable(_w3)) { int k = e.getIntKey(); Preconditions.checkArgument(k > 0, HiveException.class); hk.set(k); w3.set(e.getFloatValue()); forward(row); // hk(f), w3 } this._w3 = null; }
From source file:hivemall.fm.FactorizationMachineUDTF.java
License:Apache License
private void forwardAsIntFeature(@Nonnull final FactorizationMachineModel model, final int factors) throws HiveException { final IntWritable f_idx = new IntWritable(0); final FloatWritable f_Wi = new FloatWritable(0.f); final FloatWritable[] f_Vi = HiveUtils.newFloatArray(factors, 0.f); final Object[] forwardObjs = new Object[3]; forwardObjs[0] = f_idx;//w w w . ja v a 2s . c o m forwardObjs[1] = f_Wi; forwardObjs[2] = null; // W0 f_idx.set(0); f_Wi.set(model.getW0()); // V0 is null forward(forwardObjs); // Wi, Vif (i starts from 1..P) forwardObjs[2] = Arrays.asList(f_Vi); for (int i = model.getMinIndex(), maxIdx = model.getMaxIndex(); i <= maxIdx; i++) { final float[] vi = model.getV(i); if (vi == null) { continue; } f_idx.set(i); // set Wi final float w = model.getW(i); f_Wi.set(w); // set Vif for (int f = 0; f < factors; f++) { float v = vi[f]; f_Vi[f].set(v); } forward(forwardObjs); } }
From source file:hivemall.ftvec.conv.QuantifyColumnsUDTF.java
License:Apache License
@Override public void process(Object[] args) throws HiveException { boolean outputRow = boolOI.get(args[0]); if (outputRow) { final Object[] forwardObjs = this.forwardObjs; for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) { Object arg = args[i + 1]; Identifier<String> identifier = identifiers[i]; if (identifier == null) { forwardObjs[i] = arg;/*from w ww. ja v a2 s. co m*/ } else { if (arg == null) { forwardObjs[i] = null; } else { String k = arg.toString(); int id = identifier.valueOf(k); IntWritable o = forwardIntObjs[i]; o.set(id); forwardObjs[i] = o; } } } forward(forwardObjs); } else {// load only for (int i = 0, outputSize = args.length - 1; i < outputSize; i++) { Identifier<String> identifier = identifiers[i]; if (identifier != null) { Object arg = args[i + 1]; if (arg != null) { String k = arg.toString(); identifier.valueOf(k); } } } } }
From source file:hivemall.knn.similarity.DIMSUMMapperUDTF.java
License:Apache License
private void forwardAsIntFeature(@Nonnull Feature[] row) throws HiveException { final int length = row.length; Feature[] rowScaled = new Feature[length]; for (int i = 0; i < length; i++) { int j = row[i].getFeatureIndex(); double norm = Primitives.doubleValue(colNorms.get(j), 0.d); if (norm == 0.d) { // avoid zero-division norm = 1.d;/*from w ww . ja v a 2s . c o m*/ } double scaled = row[i].getValue() / Math.min(sqrtGamma, norm); rowScaled[i] = new IntFeature(j, scaled); } final IntWritable jWritable = new IntWritable(); final IntWritable kWritable = new IntWritable(); final DoubleWritable bWritable = new DoubleWritable(); final Object[] forwardObjs = new Object[3]; forwardObjs[0] = jWritable; forwardObjs[1] = kWritable; forwardObjs[2] = bWritable; for (int ij = 0; ij < length; ij++) { int j = rowScaled[ij].getFeatureIndex(); double jVal = rowScaled[ij].getValue(); double jProb = Primitives.doubleValue(colProbs.get(j), 0.d); if (jVal != 0.d && rnd.nextDouble() < jProb) { for (int ik = ij + 1; ik < length; ik++) { int k = rowScaled[ik].getFeatureIndex(); double kVal = rowScaled[ik].getValue(); double kProb = Primitives.doubleValue(colProbs.get(k), 0.d); if (kVal != 0.d && rnd.nextDouble() < kProb) { // compute b_jk bWritable.set(jVal * kVal); if (symmetricOutput) { // (j, k); similarity matrix is symmetric jWritable.set(j); kWritable.set(k); forward(forwardObjs); // (k, j) jWritable.set(k); kWritable.set(j); forward(forwardObjs); } else { if (j < k) { jWritable.set(j); kWritable.set(k); } else { jWritable.set(k); kWritable.set(j); } forward(forwardObjs); } } } } } }