Example usage for org.apache.hadoop.io IntWritable set

List of usage examples for org.apache.hadoop.io IntWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io IntWritable set.

Prototype

public void set(int value) 

Source Link

Document

Set the value of this IntWritable.

Usage

From source file:org.apache.mahout.common.mapreduce.TransposeMapper.java

License:Apache License

@Override
protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException {
    int row = r.get();
    for (Vector.Element e : v.get().nonZeroes()) {
        RandomAccessSparseVector tmp = new RandomAccessSparseVector(newNumCols, 1);
        tmp.setQuick(row, e.get());/*from  ww w .  jav  a 2  s .  co m*/
        r.set(e.index());
        ctx.write(r, new VectorWritable(tmp));
    }
}

From source file:org.apache.mahout.math.DistributedRowMatrixWriter.java

License:Apache License

public static void write(Path outputDir, Configuration conf, Iterable<MatrixSlice> matrix) throws IOException {
    FileSystem fs = outputDir.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outputDir, IntWritable.class,
            VectorWritable.class);
    IntWritable topic = new IntWritable();
    VectorWritable vector = new VectorWritable();
    for (MatrixSlice slice : matrix) {
        topic.set(slice.index());
        vector.set(slice.vector());// w w  w.j av a  2  s .c om
        writer.append(topic, vector);
    }
    writer.close();

}

From source file:org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver.java

License:Apache License

/**
 * @param state The final LanczosState to be serialized
 * @param outputPath The path (relative to the current Configuration's FileSystem) to save the output to.
 *///from  ww w .j a  va  2  s .  c  o  m
public void serializeOutput(LanczosState state, Path outputPath) throws IOException {
    int numEigenVectors = state.getIterationNumber();
    log.info("Persisting {} eigenVectors and eigenValues to: {}", numEigenVectors, outputPath);
    Configuration conf = getConf() != null ? getConf() : new Configuration();
    FileSystem fs = FileSystem.get(outputPath.toUri(), conf);
    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, outputPath, IntWritable.class,
            VectorWritable.class);
    try {
        IntWritable iw = new IntWritable();
        for (int i = 0; i < numEigenVectors; i++) {
            // Persist eigenvectors sorted by eigenvalues in descending order\
            NamedVector v = new NamedVector(state.getRightSingularVector(numEigenVectors - 1 - i),
                    "eigenVector" + i + ", eigenvalue = " + state.getSingularValue(numEigenVectors - 1 - i));
            Writable vw = new VectorWritable(v);
            iw.set(i);
            seqWriter.append(iw, vw);
        }
    } finally {
        Closeables.close(seqWriter, false);
    }
}

From source file:org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob.java

License:Apache License

private void saveCleanEigens(Configuration conf,
        Collection<Map.Entry<MatrixSlice, EigenStatus>> prunedEigenMeta) throws IOException {
    Path path = new Path(outPath, CLEAN_EIGENVECTORS);
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, IntWritable.class,
            VectorWritable.class);
    try {//w w  w .ja  va  2s . com
        IntWritable iw = new IntWritable();
        int numEigensWritten = 0;
        int index = 0;
        for (Map.Entry<MatrixSlice, EigenStatus> pruneSlice : prunedEigenMeta) {
            MatrixSlice s = pruneSlice.getKey();
            EigenStatus meta = pruneSlice.getValue();
            EigenVector ev = new EigenVector(s.vector(), meta.getEigenValue(), Math.abs(1 - meta.getCosAngle()),
                    s.index());
            // log.info("appending {} to {}", ev, path);
            Writable vw = new VectorWritable(ev);
            iw.set(index++);
            seqWriter.append(iw, vw);

            // increment the number of eigenvectors written and see if we've
            // reached our specified limit, or if we wish to write all eigenvectors
            // (latter is built-in, since numEigensWritten will always be > 0
            numEigensWritten++;
            if (numEigensWritten == maxEigensToKeep) {
                log.info("{} of the {} total eigens have been written", maxEigensToKeep,
                        prunedEigenMeta.size());
                break;
            }
        }
    } finally {
        Closeables.close(seqWriter, false);
    }
    cleanedEigensPath = path;
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCADenseTest.java

License:Apache License

public void runSSVDSolver(int q) throws IOException {

    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");

    // conf.set("mapred.job.tracker","localhost:11011");
    // conf.set("fs.default.name","hdfs://localhost:11010/");

    Deque<Closeable> closeables = new LinkedList<Closeable>();
    Random rnd = RandomUtils.getRandom();

    File tmpDir = getTestTempDir("svdtmp");
    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");

    // create distributed row matrix-like struct
    SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath,
            IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec());
    closeables.addFirst(w);/*from w  w w .  java  2 s  .c  om*/

    int n = 100;
    int m = 2000;
    double percent = 5;

    VectorWritable vw = new VectorWritable();
    IntWritable roww = new IntWritable();

    Vector xi = new DenseVector(n);

    double muAmplitude = 50.0;
    for (int i = 0; i < m; i++) {
        Vector dv = new SequentialAccessSparseVector(n);
        for (int j = 0; j < n * percent / 100; j++) {
            dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25));
        }
        roww.set(i);
        vw.set(dv);
        w.append(roww, vw);
        xi.assign(dv, Functions.PLUS);
    }
    closeables.remove(w);
    Closeables.close(w, true);

    xi.assign(Functions.mult(1 / m));

    FileSystem fs = FileSystem.get(conf);

    Path tempDirPath = getTestTempDirPath("svd-proc");
    Path aPath = new Path(tempDirPath, "A/A.seq");
    fs.copyFromLocalFile(aLocPath, aPath);
    Path xiPath = new Path(tempDirPath, "xi/xi.seq");
    SSVDHelper.saveVector(xi, xiPath, conf);

    Path svdOutPath = new Path(tempDirPath, "SSVD-out");

    // make sure we wipe out previous test results, just a convenience
    fs.delete(svdOutPath, true);

    // Solver starts here:
    System.out.println("Input prepared, starting solver...");

    int ablockRows = 867;
    int p = 60;
    int k = 40;
    SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3);
    ssvd.setOuterBlockHeight(500);
    ssvd.setAbtBlockHeight(251);
    ssvd.setPcaMeanPath(xiPath);

    /*
     * removing V,U jobs from this test to reduce running time. i will keep them
     * put in the dense test though.
     */
    ssvd.setComputeU(false);
    ssvd.setComputeV(false);

    ssvd.setOverwrite(true);
    ssvd.setQ(q);
    ssvd.setBroadcast(true);
    ssvd.run();

    Vector stochasticSValues = ssvd.getSingularValues();
    System.out.println("--SSVD solver singular values:");
    LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues);
    System.out.println("--Colt SVD solver singular values:");

    // try to run the same thing without stochastic algo
    double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);

    // subtract pseudo pca mean
    for (int i = 0; i < m; i++)
        for (int j = 0; j < n; j++)
            a[i][j] -= xi.getQuick(j);

    SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a));

    Vector svalues2 = new DenseVector(svd2.getSingularValues());
    LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2);

    for (int i = 0; i < k + p; i++) {
        assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon);
    }

    double[][] mQ = SSVDHelper.loadDistributedRowMatrix(fs,
            new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf);

    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon);

}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDSolverSparseSequentialTest.java

License:Apache License

public void runSSVDSolver(int q) throws IOException {

    Configuration conf = getConfiguration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");

    // conf.set("mapred.job.tracker","localhost:11011");
    // conf.set("fs.default.name","hdfs://localhost:11010/");

    Deque<Closeable> closeables = Lists.newLinkedList();
    ;//from   w w  w .j  a v  a 2s.  c om
    Random rnd = RandomUtils.getRandom();

    File tmpDir = getTestTempDir("svdtmp");
    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");

    // create distributed row matrix-like struct
    SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath,
            IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec());
    closeables.addFirst(w);

    int n = 100;
    int m = 2000;
    double percent = 5;

    VectorWritable vw = new VectorWritable();
    IntWritable roww = new IntWritable();

    double muAmplitude = 50.0;
    for (int i = 0; i < m; i++) {
        Vector dv = new SequentialAccessSparseVector(n);
        for (int j = 0; j < n * percent / 100; j++) {
            dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5));
        }
        roww.set(i);
        vw.set(dv);
        w.append(roww, vw);
    }
    closeables.remove(w);
    Closeables.close(w, false);

    FileSystem fs = FileSystem.get(aLocPath.toUri(), conf);

    Path tempDirPath = getTestTempDirPath("svd-proc");
    Path aPath = new Path(tempDirPath, "A/A.seq");
    fs.copyFromLocalFile(aLocPath, aPath);

    Path svdOutPath = new Path(tempDirPath, "SSVD-out");

    // make sure we wipe out previous test results, just a convenience
    fs.delete(svdOutPath, true);

    // Solver starts here:
    System.out.println("Input prepared, starting solver...");

    int ablockRows = 867;
    int p = 60;
    int k = 40;
    SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3);
    ssvd.setOuterBlockHeight(500);
    ssvd.setAbtBlockHeight(251);

    /*
     * removing V,U jobs from this test to reduce running time. i will keep them
     * put in the dense test though.
     */
    ssvd.setComputeU(false);
    ssvd.setComputeV(false);

    ssvd.setOverwrite(true);
    ssvd.setQ(q);
    ssvd.setBroadcast(true);
    ssvd.run();

    Vector stochasticSValues = ssvd.getSingularValues();
    System.out.println("--SSVD solver singular values:");
    dumpSv(stochasticSValues);
    System.out.println("--Colt SVD solver singular values:");

    // try to run the same thing without stochastic algo
    DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf);

    // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new
    // Array2DRowRealMatrix(a));
    SingularValueDecomposition svd2 = new SingularValueDecomposition(a);

    Vector svalues2 = new DenseVector(svd2.getSingularValues());
    dumpSv(svalues2);

    for (int i = 0; i < k + p; i++) {
        assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon);
    }

    DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"),
            conf);

    SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon);

    IOUtils.close(closeables);
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.SSVDTestsHelper.java

License:Apache License

/**
 * Generate some randome but meaningful input with singular value ratios of n,
 * n-1...1//from   www .j a v a  2  s  .c o m
 * 
 * @param outputPath
 */

static void generateDenseInput(Path outputPath, FileSystem dfs, Vector svalues, int m, int n, int startRowKey)
        throws IOException {

    Random rnd = RandomUtils.getRandom();

    int svCnt = svalues.size();
    Matrix v = generateDenseOrthonormalRandom(n, svCnt, rnd);
    Matrix u = generateDenseOrthonormalRandom(m, svCnt, rnd);

    // apply singular values
    Matrix mx = m > n ? v : u;
    for (int i = 0; i < svCnt; i++) {
        mx.assignColumn(i, mx.viewColumn(i).times(svalues.getQuick(i)));
    }

    SequenceFile.Writer w = SequenceFile.createWriter(dfs, dfs.getConf(), outputPath, IntWritable.class,
            VectorWritable.class);
    try {

        Vector outV = new DenseVector(n);
        Writable vw = new VectorWritable(outV);
        IntWritable iw = new IntWritable();

        for (int i = 0; i < m; i++) {
            iw.set(startRowKey + i);
            for (int j = 0; j < n; j++) {
                outV.setQuick(j, u.viewRow(i).dot(v.viewRow(j)));
            }
            w.append(iw, vw);
        }

    } finally {
        w.close();
    }

}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.SSVDTestsHelper.java

License:Apache License

public static void main(String[] args) throws Exception {
    // create 1Gb input for distributed tests.
    MahoutTestCase ca = new MahoutTestCase();
    Configuration conf = ca.getConfiguration();
    FileSystem dfs = FileSystem.getLocal(conf);
    Path outputDir = new Path("/tmp/DRM");
    dfs.mkdirs(outputDir);//from  w  ww. j  a v a2 s.c  o m
    //    for ( int i = 1; i <= 10; i++ ) {
    //      generateDenseInput(new Path(outputDir,String.format("part-%05d",i)),dfs,
    //                         new DenseVector ( new double[] {
    //                             15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0.8,0.3,0.1,0.01
    //                         }),1200,10000,(i-1)*1200);
    //    }

    /*
     *  create 2Gb sparse 4.5 m x 4.5m input . (similar to wikipedia graph).
     *  
     *  In order to get at 2Gb, we need to generate ~ 40 non-zero items per row average.
     *   
     */

    outputDir = new Path("/tmp/DRM-sparse");
    Random rnd = RandomUtils.getRandom();

    SequenceFile.Writer w = SequenceFile.createWriter(dfs, dfs.getConf(), new Path(outputDir, "sparse.seq"),
            IntWritable.class, VectorWritable.class);

    try {

        IntWritable iw = new IntWritable();
        VectorWritable vw = new VectorWritable();
        int avgNZero = 40;
        int n = 4500000;
        for (int i = 1; i < n; i++) {
            Vector vector = new RandomAccessSparseVector(n);
            double nz = Math.round(avgNZero * (rnd.nextGaussian() + 1));
            if (nz < 0) {
                nz = 0;
            }
            for (int j = 1; j < nz; j++) {
                vector.set(rnd.nextInt(n), rnd.nextGaussian() * 25 + 3);
            }
            iw.set(i);
            vw.set(vector);
            w.append(iw, vw);
        }
    } finally {
        w.close();
    }

}

From source file:org.apache.mahout.math.MatrixUtils.java

License:Apache License

public static void write(Path outputDir, Configuration conf, VectorIterable matrix) throws IOException {
    FileSystem fs = outputDir.getFileSystem(conf);
    fs.delete(outputDir, true);//from  w w  w.  j ava2  s  . co  m
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outputDir, IntWritable.class,
            VectorWritable.class);
    IntWritable topic = new IntWritable();
    VectorWritable vector = new VectorWritable();
    for (MatrixSlice slice : matrix) {
        topic.set(slice.index());
        vector.set(slice.vector());
        writer.append(topic, vector);
    }
    writer.close();
}

From source file:org.apache.mahout.utils.SplitInputTest.java

License:Apache License

/**
 * Create a Sequencefile for testing consisting of IntWritable
 * keys and VectorWritable values/*from w w  w  .j ava 2s  . c o  m*/
 * @param path path for test SequenceFile
 * @param testPoints number of records in test SequenceFile
 */
private void writeVectorSequenceFile(Path path, int testPoints) throws IOException {
    Path tempSequenceFile = new Path(path, "part-00000");
    Configuration conf = getConfiguration();
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, tempSequenceFile, IntWritable.class, VectorWritable.class);
        for (int i = 0; i < testPoints; i++) {
            key.set(i);
            Vector v = new SequentialAccessSparseVector(4);
            v.assign(i);
            value.set(v);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}