List of usage examples for org.apache.hadoop.io IntWritable toString
@Override
public String toString()
From source file:ml.shifu.shifu.core.posttrain.PostTrainReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<FeatureStatsWritable> values, Context context) throws IOException, InterruptedException { List<BinStats> binStats = null; for (FeatureStatsWritable fsw : values) { if (binStats == null) { binStats = fsw.getBinStats(); } else {/*from w ww . j ava 2 s . c o m*/ for (int i = 0; i < binStats.size(); i++) { BinStats rbs = binStats.get(i); BinStats bs = fsw.getBinStats().get(i); rbs.setBinSum(rbs.getBinSum() + bs.getBinSum()); rbs.setBinCnt(rbs.getBinCnt() + bs.getBinCnt()); } } } StringBuilder sb = new StringBuilder(150); for (int i = 0; i < binStats.size(); i++) { BinStats bs = binStats.get(i); int avgScore = 0; if (bs.getBinCnt() != 0L) { avgScore = (int) (bs.getBinSum() / bs.getBinCnt()); } if (i == binStats.size() - 1) { sb.append(avgScore); } else { sb.append(avgScore).append(','); } } LOG.info(key.toString() + " " + sb.toString()); this.outputValue.set(sb.toString()); context.write(key, this.outputValue); }
From source file:mlbench.kmeans.KmeansInit.java
License:Apache License
/** * get the input values and choose the K clusters' centers * * @param dataPath//w w w .j a v a2 s .c om * @throws MPI_D_Exception * @throws IOException * @throws MPIException */ @SuppressWarnings("deprecation") private static void init(String args[], String dataPath, int kCluster, HashMap<String, String> conf) throws MPI_D_Exception, IOException, MPIException { MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, config, dataPath, rank); // for record the initialized state for (FileSplit path : inputs) { SequenceFileInputFormat f = new SequenceFileInputFormat(); JobConf jobConf = new JobConf(confPath); Reporter r = new KmeansUtils.EmptyReport(); RecordReader<LongWritable, VectorWritable> reader = f.getRecordReader(path, jobConf, r); Random random = new Random(1000); LongWritable k = reader.createKey(); VectorWritable v = reader.createValue(); IntWritable cluster = new IntWritable(); while (reader.next(k, v)) { cluster.set(random.nextInt(kCluster)); MPI_D.Send(cluster, v); } reader.close(); } } else { IntWritable key = null, newKey = null; VectorWritable point = null, newPoint = null; double sum[] = null; int count = 0; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (IntWritable) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { sum = new double[newPoint.get().size()]; } else if (!key.equals(newKey)) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = sum[i] / count; } PointVector oneCenter = new PointVector(Integer.valueOf(key.toString()), centerVals); centers.add(oneCenter); sum = new double[point.get().size()]; count = 0; } key = newKey; point = newPoint; KmeansUtils.accumulate(sum, newPoint.get()); count++; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = sum[i] / count; } PointVector oneCenter = new PointVector(key.get(), centerVals); centers.add(oneCenter); } transfer = new KmeansUtils.CenterTransfer(config, rank, size); transfer.gatherCentersByP2P(centers); if (rank == 0) { OutputStream resOut = KmeansUtils.getOutputStream(outPath, config); DataOutput os = new DataOutputStream(resOut); for (PointVector centerPoint : centers) { os.write((centerPoint.toString() + "\n").getBytes()); } resOut.flush(); resOut.close(); } System.out.println("rank " + rank + " finish"); } MPI_D.Finalize(); }
From source file:mlbench.kmeans.KmeansIter.java
License:Apache License
/** * Calculate the new center iteratively/*from w w w. j ava 2s . c o m*/ * * @return true: finish; false: continue * @throws MPI_D_Exception * @throws MPIException * @throws IOException */ @SuppressWarnings("deprecation") private static void iterBody(String args[], HashMap<String, String> conf) throws MPI_D_Exception, MPIException, IOException { MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); if (rank == 0) { System.out.println(centerPath); DataInputStream in = KmeansUtils.readFromHDFSF(new Path(centerPath), config); String lineVal; try { while ((lineVal = in.readLine()) != null) { String lineSeq[] = lineVal.split(":"); PointVector p = new PointVector(Integer.valueOf(lineSeq[0]), format(lineSeq[1])); centers.add(p); } } catch (IOException e) { e.printStackTrace(); } finally { try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } MPI_D.COMM_BIPARTITE_O.Barrier(); KmeansUtils.CenterTransfer transfer = new KmeansUtils.CenterTransfer(config, rank, size); transfer.broadcastCenters(centers); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, config, dataPath, rank); double centerSum[][] = new double[kCluster][]; long centerPNum[] = new long[kCluster]; // for record the initialized state for (FileSplit path : inputs) { SequenceFileInputFormat f = new SequenceFileInputFormat(); JobConf jobConf = new JobConf(confPath); Reporter r = new KmeansUtils.EmptyReport(); RecordReader<LongWritable, VectorWritable> reader = f.getRecordReader(path, jobConf, r); LongWritable k = reader.createKey(); VectorWritable v = reader.createValue(); while (reader.next(k, v)) { int centerBelong = (int) getBelongPoint(v); // int i = (int) p.getStrClusterClass(); // double[] vals = p.getDoubleValue(); int len = v.get().size(); if (centerSum[centerBelong] == null) { centerSum[centerBelong] = new double[len]; } for (int j = 0; j < len; j++) { centerSum[centerBelong][j] += v.get().get(j); } centerPNum[centerBelong]++; } reader.close(); } for (int i = 0; i < centerPNum.length; i++) { if (centerSum[i] == null && centerPNum[i] == 0) { continue; } MPI_D.Send(new IntWritable(i), new KmeansCenters(centerPNum[i], centerSum[i])); } } else { centers.clear(); IntWritable key = null, newKey = null; KmeansCenters value = null, newValue = null; double sum[] = null; long count = 0; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (IntWritable) vals[0]; newValue = (KmeansCenters) vals[1]; if (key == null && value == null) { sum = new double[newValue.getVector().length]; } else if (!key.equals(newKey)) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = (double) sum[i] / count; } PointVector oneCenter = new PointVector(Integer.valueOf(key.toString()), centerVals); centers.add(oneCenter); sum = new double[value.getVector().length]; count = 0; } key = newKey; value = newValue; KmeansUtils.accumulate(sum, newValue.getVector()); count += Long.valueOf(newValue.getPointSize()); vals = MPI_D.Recv(); } if (newKey != null && newValue != null) { double[] centerVals = new double[sum.length]; for (int i = 0; i < centerVals.length; i++) { centerVals[i] = sum[i] / count; } PointVector oneCenter = new PointVector(key.get(), centerVals); centers.add(oneCenter); } KmeansUtils.CenterTransfer transfer = new KmeansUtils.CenterTransfer(config, rank, size); transfer.gatherCentersByP2P(centers); if (rank == 0) { OutputStream resOut = KmeansUtils.getOutputStream(outPath, config); DataOutput os = new DataOutputStream(resOut); for (PointVector centerPoint : centers) { os.write((centerPoint.toString() + "\n").getBytes()); } resOut.flush(); resOut.close(); } } MPI_D.Finalize(); }
From source file:org.ankus.mapreduce.algorithms.statistics.numericstats.NumericStats1MRReducer.java
License:Apache License
@Override protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { Iterator<Text> iterator = values.iterator(); int cnt = 0;//from w ww . j a v a 2s . c om double sum = 0; double avg = 0; double avgGeometric = 0; double avgHarmonic = 0; double variance = 0; double stdDeviation = 0; double maxData = 0; double minData = 0; double middleData_Value = 0; double harmonicSum = 0; double geometricSum = 0; double squareSum = 0; boolean allPositive = true; while (iterator.hasNext()) { double value = Double.parseDouble(iterator.next().toString()); cnt++; if (cnt == 1) { maxData = value; minData = value; } else { if (maxData < value) maxData = value; if (minData > value) minData = value; } if (value <= 0) allPositive = false; sum += value; if (allPositive) { harmonicSum += 1 / value; geometricSum += Math.log10(value); } squareSum += Math.pow(value, 2) / 10000; } avg = sum / (double) cnt; if (allPositive) { avgHarmonic = (double) cnt / harmonicSum; avgGeometric = Math.pow(10, geometricSum / (double) cnt); } else { avgHarmonic = 0; avgGeometric = 0; } variance = (squareSum * 10000 / (double) cnt) - Math.pow(avg, 2); stdDeviation = Math.sqrt(variance); middleData_Value = (maxData + minData) / 2; String writeVal = sum + delimiter + avg + delimiter + avgHarmonic + delimiter + avgGeometric + delimiter + variance + delimiter + stdDeviation + delimiter + maxData + delimiter + minData + delimiter + middleData_Value; context.write(NullWritable.get(), new Text(key.toString() + delimiter + writeVal)); }
From source file:org.apache.hama.examples.util.WritableUtil.java
License:Apache License
/** * Method used to test RandomMatrixGenerator. Reads input matrix from * specified path and prints to System.out *///from ww w . j a v a 2s . co m public static void readMatrix(String pathString) throws IOException { HamaConfiguration conf = new HamaConfiguration(); Path dir = new Path(pathString); FileSystem fs = FileSystem.get(conf); FileStatus[] stats = fs.listStatus(dir); for (FileStatus stat : stats) { String filePath = stat.getPath().toUri().getPath(); // gives directory // name try { SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(filePath), conf); IntWritable key = new IntWritable(); SparseVectorWritable value = new SparseVectorWritable(); while (reader.next(key, value)) { System.out.println(key.toString()); System.out.println(value.toString()); } } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:org.apache.mahout.knn.tools.BasicKMeans.java
License:Apache License
public static void main(String args[]) throws Exception { // The number of clusters to be formed. int numClusters = 2; List<Vector> vectors = getPoints(points); File testData = new File("testdata"); if (!testData.exists()) { testData.mkdir();/*ww w . ja va 2 s. c o m*/ } testData = new File("testdata/points"); if (!testData.exists()) { testData.mkdir(); } Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // Wrote the generated vectors to a sequence file. writePointsToFile(vectors, "testdata/points/file1", fs, conf); Path path = new Path("testdata/clusters/part-00000"); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class); // Prepare the initial set of centroids. for (int i = 0; i < numClusters; ++i) { Vector vec = vectors.get(i); Cluster cluster = new Kluster(vec, i, new EuclideanDistanceMeasure()); writer.append(new Text(cluster.asFormatString(null)), cluster); } writer.close(); // Run the KMeans algorithm. KMeansDriver.run(conf, new Path("testdata/points/"), new Path("testdata/clusters"), new Path("output/"), new EuclideanDistanceMeasure(), 0.0001, // convergenceDelta 10, // maxIterations true, // runClustering 0.01, // clusterClassificationThreshold false // runSequential (if false, runs as MapReduce ); // Print out final results. SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf); IntWritable key = new IntWritable(); WeightedVectorWritable value = new WeightedVectorWritable(); while (reader.next(key, value)) { System.out.println(value.toString() + " belongs to cluster " + key.toString()); } reader.close(); }
From source file:org.apache.nutch.scoring.depth.DepthScoringFilter.java
License:Apache License
@Override public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException { IntWritable depth = (IntWritable) datum.getMetaData().get(DEPTH_KEY_W); if (depth != null) { content.getMetadata().set(DEPTH_KEY, depth.toString()); }/* w w w. ja v a2 s. com*/ IntWritable maxdepth = (IntWritable) datum.getMetaData().get(MAX_DEPTH_KEY_W); if (maxdepth != null) { content.getMetadata().set(MAX_DEPTH_KEY, maxdepth.toString()); } }
From source file:org.apache.ranger.authorization.hive.udf.RangerUdfMask.java
License:Apache License
int getCharArg(ObjectInspector[] arguments, int index, int defaultValue) { int ret = defaultValue; ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null; if (arg != null) { if (arg instanceof WritableConstantIntObjectInspector) { IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue(); if (value != null) { ret = value.get();/*from w ww. jav a2 s. c o m*/ } } else if (arg instanceof WritableConstantLongObjectInspector) { LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue(); if (value != null) { ret = (int) value.get(); } } else if (arg instanceof WritableConstantShortObjectInspector) { ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue(); if (value != null) { ret = value.get(); } } else if (arg instanceof ConstantObjectInspector) { Object value = ((ConstantObjectInspector) arg).getWritableConstantValue(); if (value != null) { String strValue = value.toString(); if (strValue != null && strValue.length() > 0) { ret = strValue.charAt(0); } } } } return ret; }
From source file:org.apache.ranger.authorization.hive.udf.RangerUdfMask.java
License:Apache License
int getIntArg(ObjectInspector[] arguments, int index, int defaultValue) { int ret = defaultValue; ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null; if (arg != null) { if (arg instanceof WritableConstantIntObjectInspector) { IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue(); if (value != null) { ret = value.get();/*from w ww . j a va2 s. c om*/ } } else if (arg instanceof WritableConstantLongObjectInspector) { LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue(); if (value != null) { ret = (int) value.get(); } } else if (arg instanceof WritableConstantShortObjectInspector) { ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue(); if (value != null) { ret = value.get(); } } else if (arg instanceof ConstantObjectInspector) { Object value = ((ConstantObjectInspector) arg).getWritableConstantValue(); if (value != null) { String strValue = value.toString(); if (strValue != null && strValue.length() > 0) { ret = Integer.parseInt(value.toString()); } } } } return ret; }
From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.rank.RankedDumper.java
License:Open Source License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(args[0]); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); IntWritable key = new IntWritable(); TextBytes value = new TextBytes(); while (reader.next(key, value)) { System.out.println(key.toString() + "\t" + value.toString()); }/*from www . j a va 2s . c o m*/ reader.close(); }