Example usage for org.apache.hadoop.io IntWritable toString

List of usage examples for org.apache.hadoop.io IntWritable toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:ml.shifu.shifu.core.posttrain.PostTrainReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<FeatureStatsWritable> values, Context context)
        throws IOException, InterruptedException {
    List<BinStats> binStats = null;
    for (FeatureStatsWritable fsw : values) {
        if (binStats == null) {
            binStats = fsw.getBinStats();
        } else {/*from   w ww  .  j ava 2  s .  c  o  m*/
            for (int i = 0; i < binStats.size(); i++) {
                BinStats rbs = binStats.get(i);
                BinStats bs = fsw.getBinStats().get(i);
                rbs.setBinSum(rbs.getBinSum() + bs.getBinSum());
                rbs.setBinCnt(rbs.getBinCnt() + bs.getBinCnt());
            }
        }
    }

    StringBuilder sb = new StringBuilder(150);
    for (int i = 0; i < binStats.size(); i++) {
        BinStats bs = binStats.get(i);
        int avgScore = 0;
        if (bs.getBinCnt() != 0L) {
            avgScore = (int) (bs.getBinSum() / bs.getBinCnt());
        }
        if (i == binStats.size() - 1) {
            sb.append(avgScore);
        } else {
            sb.append(avgScore).append(',');
        }
    }
    LOG.info(key.toString() + " " + sb.toString());
    this.outputValue.set(sb.toString());
    context.write(key, this.outputValue);
}

From source file:mlbench.kmeans.KmeansInit.java

License:Apache License

/**
 * get the input values and choose the K clusters' centers
 *
 * @param dataPath//w w  w .j a  v  a2  s  .c om
 * @throws MPI_D_Exception
 * @throws IOException
 * @throws MPIException
 */
@SuppressWarnings("deprecation")
private static void init(String args[], String dataPath, int kCluster, HashMap<String, String> conf)
        throws MPI_D_Exception, IOException, MPIException {
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, config,
                dataPath, rank);

        // for record the initialized state
        for (FileSplit path : inputs) {
            SequenceFileInputFormat f = new SequenceFileInputFormat();
            JobConf jobConf = new JobConf(confPath);
            Reporter r = new KmeansUtils.EmptyReport();
            RecordReader<LongWritable, VectorWritable> reader = f.getRecordReader(path, jobConf, r);

            Random random = new Random(1000);
            LongWritable k = reader.createKey();
            VectorWritable v = reader.createValue();

            IntWritable cluster = new IntWritable();
            while (reader.next(k, v)) {
                cluster.set(random.nextInt(kCluster));
                MPI_D.Send(cluster, v);
            }
            reader.close();
        }
    } else {
        IntWritable key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        double sum[] = null;
        int count = 0;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (IntWritable) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
                sum = new double[newPoint.get().size()];
            } else if (!key.equals(newKey)) {
                double[] centerVals = new double[sum.length];
                for (int i = 0; i < centerVals.length; i++) {
                    centerVals[i] = sum[i] / count;
                }
                PointVector oneCenter = new PointVector(Integer.valueOf(key.toString()), centerVals);
                centers.add(oneCenter);
                sum = new double[point.get().size()];
                count = 0;
            }
            key = newKey;
            point = newPoint;
            KmeansUtils.accumulate(sum, newPoint.get());
            count++;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            double[] centerVals = new double[sum.length];
            for (int i = 0; i < centerVals.length; i++) {
                centerVals[i] = sum[i] / count;
            }
            PointVector oneCenter = new PointVector(key.get(), centerVals);
            centers.add(oneCenter);
        }

        transfer = new KmeansUtils.CenterTransfer(config, rank, size);
        transfer.gatherCentersByP2P(centers);

        if (rank == 0) {
            OutputStream resOut = KmeansUtils.getOutputStream(outPath, config);
            DataOutput os = new DataOutputStream(resOut);

            for (PointVector centerPoint : centers) {
                os.write((centerPoint.toString() + "\n").getBytes());
            }
            resOut.flush();
            resOut.close();
        }

        System.out.println("rank " + rank + " finish");
    }
    MPI_D.Finalize();
}

From source file:mlbench.kmeans.KmeansIter.java

License:Apache License

/**
 * Calculate the new center iteratively/*from   w  w w. j ava  2s .  c  o  m*/
 *
 * @return true: finish; false: continue
 * @throws MPI_D_Exception
 * @throws MPIException
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private static void iterBody(String args[], HashMap<String, String> conf)
        throws MPI_D_Exception, MPIException, IOException {
    MPI_D.Init(args, MPI_D.Mode.Common, conf);

    if (MPI_D.COMM_BIPARTITE_O != null) {
        rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);

        if (rank == 0) {
            System.out.println(centerPath);
            DataInputStream in = KmeansUtils.readFromHDFSF(new Path(centerPath), config);

            String lineVal;
            try {
                while ((lineVal = in.readLine()) != null) {
                    String lineSeq[] = lineVal.split(":");
                    PointVector p = new PointVector(Integer.valueOf(lineSeq[0]), format(lineSeq[1]));
                    centers.add(p);
                }
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                try {
                    in.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        MPI_D.COMM_BIPARTITE_O.Barrier();

        KmeansUtils.CenterTransfer transfer = new KmeansUtils.CenterTransfer(config, rank, size);
        transfer.broadcastCenters(centers);

        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, config,
                dataPath, rank);
        double centerSum[][] = new double[kCluster][];
        long centerPNum[] = new long[kCluster];

        // for record the initialized state
        for (FileSplit path : inputs) {
            SequenceFileInputFormat f = new SequenceFileInputFormat();
            JobConf jobConf = new JobConf(confPath);
            Reporter r = new KmeansUtils.EmptyReport();
            RecordReader<LongWritable, VectorWritable> reader = f.getRecordReader(path, jobConf, r);
            LongWritable k = reader.createKey();
            VectorWritable v = reader.createValue();

            while (reader.next(k, v)) {
                int centerBelong = (int) getBelongPoint(v);
                //                    int i = (int) p.getStrClusterClass();
                //                    double[] vals = p.getDoubleValue();
                int len = v.get().size();
                if (centerSum[centerBelong] == null) {
                    centerSum[centerBelong] = new double[len];
                }
                for (int j = 0; j < len; j++) {
                    centerSum[centerBelong][j] += v.get().get(j);
                }
                centerPNum[centerBelong]++;
            }
            reader.close();
        }

        for (int i = 0; i < centerPNum.length; i++) {
            if (centerSum[i] == null && centerPNum[i] == 0) {
                continue;
            }
            MPI_D.Send(new IntWritable(i), new KmeansCenters(centerPNum[i], centerSum[i]));
        }
    } else {
        centers.clear();
        IntWritable key = null, newKey = null;
        KmeansCenters value = null, newValue = null;
        double sum[] = null;
        long count = 0;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (IntWritable) vals[0];
            newValue = (KmeansCenters) vals[1];
            if (key == null && value == null) {
                sum = new double[newValue.getVector().length];
            } else if (!key.equals(newKey)) {
                double[] centerVals = new double[sum.length];
                for (int i = 0; i < centerVals.length; i++) {
                    centerVals[i] = (double) sum[i] / count;
                }
                PointVector oneCenter = new PointVector(Integer.valueOf(key.toString()), centerVals);
                centers.add(oneCenter);
                sum = new double[value.getVector().length];
                count = 0;
            }
            key = newKey;
            value = newValue;
            KmeansUtils.accumulate(sum, newValue.getVector());
            count += Long.valueOf(newValue.getPointSize());
            vals = MPI_D.Recv();
        }
        if (newKey != null && newValue != null) {
            double[] centerVals = new double[sum.length];
            for (int i = 0; i < centerVals.length; i++) {
                centerVals[i] = sum[i] / count;
            }
            PointVector oneCenter = new PointVector(key.get(), centerVals);
            centers.add(oneCenter);
        }

        KmeansUtils.CenterTransfer transfer = new KmeansUtils.CenterTransfer(config, rank, size);
        transfer.gatherCentersByP2P(centers);

        if (rank == 0) {
            OutputStream resOut = KmeansUtils.getOutputStream(outPath, config);
            DataOutput os = new DataOutputStream(resOut);

            for (PointVector centerPoint : centers) {
                os.write((centerPoint.toString() + "\n").getBytes());
            }
            resOut.flush();
            resOut.close();
        }
    }
    MPI_D.Finalize();
}

From source file:org.ankus.mapreduce.algorithms.statistics.numericstats.NumericStats1MRReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
    Iterator<Text> iterator = values.iterator();

    int cnt = 0;//from   w ww  .  j  a v  a  2s  . c om
    double sum = 0;
    double avg = 0;
    double avgGeometric = 0;
    double avgHarmonic = 0;
    double variance = 0;
    double stdDeviation = 0;
    double maxData = 0;
    double minData = 0;
    double middleData_Value = 0;
    double harmonicSum = 0;
    double geometricSum = 0;
    double squareSum = 0;
    boolean allPositive = true;

    while (iterator.hasNext()) {
        double value = Double.parseDouble(iterator.next().toString());
        cnt++;

        if (cnt == 1) {
            maxData = value;
            minData = value;
        } else {
            if (maxData < value)
                maxData = value;
            if (minData > value)
                minData = value;
        }

        if (value <= 0)
            allPositive = false;
        sum += value;
        if (allPositive) {
            harmonicSum += 1 / value;
            geometricSum += Math.log10(value);
        }
        squareSum += Math.pow(value, 2) / 10000;
    }

    avg = sum / (double) cnt;
    if (allPositive) {
        avgHarmonic = (double) cnt / harmonicSum;
        avgGeometric = Math.pow(10, geometricSum / (double) cnt);
    } else {
        avgHarmonic = 0;
        avgGeometric = 0;
    }

    variance = (squareSum * 10000 / (double) cnt) - Math.pow(avg, 2);
    stdDeviation = Math.sqrt(variance);
    middleData_Value = (maxData + minData) / 2;

    String writeVal = sum + delimiter + avg + delimiter + avgHarmonic + delimiter + avgGeometric + delimiter
            + variance + delimiter + stdDeviation + delimiter + maxData + delimiter + minData + delimiter
            + middleData_Value;
    context.write(NullWritable.get(), new Text(key.toString() + delimiter + writeVal));
}

From source file:org.apache.hama.examples.util.WritableUtil.java

License:Apache License

/**
 * Method used to test RandomMatrixGenerator. Reads input matrix from
 * specified path and prints to System.out
 *///from   ww w  .  j  a v a 2s  . co  m
public static void readMatrix(String pathString) throws IOException {
    HamaConfiguration conf = new HamaConfiguration();
    Path dir = new Path(pathString);
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] stats = fs.listStatus(dir);
    for (FileStatus stat : stats) {
        String filePath = stat.getPath().toUri().getPath(); // gives directory
                                                            // name
        try {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(filePath), conf);
            IntWritable key = new IntWritable();
            SparseVectorWritable value = new SparseVectorWritable();
            while (reader.next(key, value)) {
                System.out.println(key.toString());
                System.out.println(value.toString());
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:org.apache.mahout.knn.tools.BasicKMeans.java

License:Apache License

public static void main(String args[]) throws Exception {
    // The number of clusters to be formed.
    int numClusters = 2;
    List<Vector> vectors = getPoints(points);

    File testData = new File("testdata");
    if (!testData.exists()) {
        testData.mkdir();/*ww w  . ja  va 2  s. c o  m*/
    }
    testData = new File("testdata/points");
    if (!testData.exists()) {
        testData.mkdir();
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    // Wrote the generated vectors to a sequence file.
    writePointsToFile(vectors, "testdata/points/file1", fs, conf);

    Path path = new Path("testdata/clusters/part-00000");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, Kluster.class);
    // Prepare the initial set of centroids.
    for (int i = 0; i < numClusters; ++i) {
        Vector vec = vectors.get(i);
        Cluster cluster = new Kluster(vec, i, new EuclideanDistanceMeasure());
        writer.append(new Text(cluster.asFormatString(null)), cluster);
    }
    writer.close();

    // Run the KMeans algorithm.
    KMeansDriver.run(conf, new Path("testdata/points/"), new Path("testdata/clusters"), new Path("output/"),
            new EuclideanDistanceMeasure(), 0.0001, // convergenceDelta
            10, // maxIterations
            true, // runClustering
            0.01, // clusterClassificationThreshold
            false // runSequential (if false, runs as MapReduce
    );

    // Print out final results.
    SequenceFile.Reader reader = new SequenceFile.Reader(fs,
            new Path("output/" + Cluster.CLUSTERED_POINTS_DIR + "/part-m-00000"), conf);
    IntWritable key = new IntWritable();
    WeightedVectorWritable value = new WeightedVectorWritable();
    while (reader.next(key, value)) {
        System.out.println(value.toString() + " belongs to cluster " + key.toString());
    }
    reader.close();
}

From source file:org.apache.nutch.scoring.depth.DepthScoringFilter.java

License:Apache License

@Override
public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException {
    IntWritable depth = (IntWritable) datum.getMetaData().get(DEPTH_KEY_W);
    if (depth != null) {
        content.getMetadata().set(DEPTH_KEY, depth.toString());
    }/*  w  w  w. ja  v a2 s.  com*/
    IntWritable maxdepth = (IntWritable) datum.getMetaData().get(MAX_DEPTH_KEY_W);
    if (maxdepth != null) {
        content.getMetadata().set(MAX_DEPTH_KEY, maxdepth.toString());
    }
}

From source file:org.apache.ranger.authorization.hive.udf.RangerUdfMask.java

License:Apache License

int getCharArg(ObjectInspector[] arguments, int index, int defaultValue) {
    int ret = defaultValue;

    ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null;

    if (arg != null) {
        if (arg instanceof WritableConstantIntObjectInspector) {
            IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                ret = value.get();/*from   w ww.  jav a2  s.  c  o  m*/
            }
        } else if (arg instanceof WritableConstantLongObjectInspector) {
            LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                ret = (int) value.get();
            }
        } else if (arg instanceof WritableConstantShortObjectInspector) {
            ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                ret = value.get();
            }
        } else if (arg instanceof ConstantObjectInspector) {
            Object value = ((ConstantObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                String strValue = value.toString();

                if (strValue != null && strValue.length() > 0) {
                    ret = strValue.charAt(0);
                }
            }
        }
    }

    return ret;
}

From source file:org.apache.ranger.authorization.hive.udf.RangerUdfMask.java

License:Apache License

int getIntArg(ObjectInspector[] arguments, int index, int defaultValue) {
    int ret = defaultValue;

    ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null;

    if (arg != null) {
        if (arg instanceof WritableConstantIntObjectInspector) {
            IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                ret = value.get();/*from  w ww .  j  a  va2  s.  c om*/
            }
        } else if (arg instanceof WritableConstantLongObjectInspector) {
            LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                ret = (int) value.get();
            }
        } else if (arg instanceof WritableConstantShortObjectInspector) {
            ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                ret = value.get();
            }
        } else if (arg instanceof ConstantObjectInspector) {
            Object value = ((ConstantObjectInspector) arg).getWritableConstantValue();

            if (value != null) {
                String strValue = value.toString();

                if (strValue != null && strValue.length() > 0) {
                    ret = Integer.parseInt(value.toString());
                }
            }
        }
    }

    return ret;
}

From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.rank.RankedDumper.java

License:Open Source License

public static void main(String[] args) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(args[0]);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    IntWritable key = new IntWritable();
    TextBytes value = new TextBytes();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + "\t" + value.toString());
    }/*from www .  j a  va  2s . c o m*/
    reader.close();
}