Example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec

Introduction

In this page you can find the example usage for org.apache.hadoop.io.compress DefaultCodec DefaultCodec.

Prototype

DefaultCodec

Source Link

Usage

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.rdfxml.DeflatedRdfXmlInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public DeflatedRdfXmlInputTest() {
    super(".rdf.deflate", new DefaultCodec());
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.thrift.DeflatedThriftQuadInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public DeflatedThriftQuadInputTest() {
    super(".trdf.deflate", new DefaultCodec());
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.thrift.DeflatedThriftTripleInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public DeflatedThriftTripleInputTest() {
    super(".trdf.deflate", new DefaultCodec());
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.trig.DeflatedTriGInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public DeflatedTriGInputTest() {
    super(".trig.deflate", new DefaultCodec());
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.trix.DeflatedTriXInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public DeflatedTriXInputTest() {
    super(".trix.deflate", new DefaultCodec());
}

From source file:org.apache.jena.hadoop.rdf.io.input.compressed.turtle.DeflatedTurtleInputTest.java

License:Apache License

/**
 * Creates new tests
 */
public DeflatedTurtleInputTest() {
    super(".nt.deflate", new DefaultCodec());
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCADenseTest.java

License:Apache License

public void runSSVDSolver(int q) throws IOException {

    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");

    // conf.set("mapred.job.tracker","localhost:11011");
    // conf.set("fs.default.name","hdfs://localhost:11010/");

    Deque<Closeable> closeables = new LinkedList<Closeable>();
    Random rnd = RandomUtils.getRandom();

    File tmpDir = getTestTempDir("svdtmp");
    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");

    // create distributed row matrix-like struct
    SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath,
            IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec());
    closeables.addFirst(w);//  w  w w.j ava 2  s .  c  o  m

    int n = 100;
    int m = 2000;
    double percent = 5;

    VectorWritable vw = new VectorWritable();
    IntWritable roww = new IntWritable();

    Vector xi = new DenseVector(n);

    double muAmplitude = 50.0;
    for (int i = 0; i < m; i++) {
        Vector dv = new SequentialAccessSparseVector(n);
        for (int j = 0; j < n * percent / 100; j++) {
            dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25));
        }
        roww.set(i);
        vw.set(dv);
        w.append(roww, vw);
        xi.assign(dv, Functions.PLUS);
    }
    closeables.remove(w);
    Closeables.close(w, true);

    xi.assign(Functions.mult(1 / m));

    FileSystem fs = FileSystem.get(conf);

    Path tempDirPath = getTestTempDirPath("svd-proc");
    Path aPath = new Path(tempDirPath, "A/A.seq");
    fs.copyFromLocalFile(aLocPath, aPath);
    Path xiPath = new Path(tempDirPath, "xi/xi.seq");
    SSVDHelper.saveVector(xi, xiPath, conf);

    Path svdOutPath = new Path(tempDirPath, "SSVD-out");

    // make sure we wipe out previous test results, just a convenience
    fs.delete(svdOutPath, true);

    // Solver starts here:
    System.out.println("Input prepared, starting solver...");

    int ablockRows = 867;
    int p = 60;
    int k = 40;
    SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3);
    ssvd.setOuterBlockHeight(500);
    ssvd.setAbtBlockHeight(251);
    ssvd.setPcaMeanPath(xiPath);

    /*
     * removing V,U jobs from this test to reduce running time. i will keep them
     * put in the dense test though.
     */
    ssvd.setComputeU(false);
    ssvd.setComputeV(false);

    ssvd.setOverwrite(true);
    ssvd.setQ(q);
    ssvd.setBroadcast(true);
    ssvd.run();

    Vector stochasticSValues = ssvd.getSingularValues();
    System.out.println("--SSVD solver singular values:");
    LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues);
    System.out.println("--Colt SVD solver singular values:");

    // try to run the same thing without stochastic algo
    double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);

    // subtract pseudo pca mean
    for (int i = 0; i < m; i++)
        for (int j = 0; j < n; j++)
            a[i][j] -= xi.getQuick(j);

    SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a));

    Vector svalues2 = new DenseVector(svd2.getSingularValues());
    LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2);

    for (int i = 0; i < k + p; i++) {
        assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon);
    }

    double[][] mQ = SSVDHelper.loadDistributedRowMatrix(fs,
            new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf);

    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon);

}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCASparseTest.java

License:Apache License

public void runSSVDSolver(int q) throws IOException {

    Configuration conf = new Configuration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");

    // conf.set("mapred.job.tracker","localhost:11011");
    // conf.set("fs.default.name","hdfs://localhost:11010/");

    Deque<Closeable> closeables = Lists.newLinkedList();
    try {/*from  w w w.  j  a  v  a 2s.  c  o  m*/
        Random rnd = RandomUtils.getRandom();

        File tmpDir = getTestTempDir("svdtmp");
        conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

        Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");

        // create distributed row matrix-like struct
        SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, Text.class,
                VectorWritable.class, CompressionType.BLOCK, new DefaultCodec());
        closeables.addFirst(w);

        int n = 100;
        int m = 2000;
        double percent = 5;

        VectorWritable vw = new VectorWritable();
        Text rkey = new Text();

        Vector xi = new DenseVector(n);

        double muAmplitude = 50.0;
        for (int i = 0; i < m; i++) {
            Vector dv = new SequentialAccessSparseVector(n);
            String rowname = "row-" + i;
            NamedVector namedRow = new NamedVector(dv, rowname);
            for (int j = 0; j < n * percent / 100; j++) {
                dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25));
            }
            rkey.set("row-i" + i);
            vw.set(namedRow);
            w.append(rkey, vw);
            xi.assign(dv, Functions.PLUS);
        }
        closeables.remove(w);
        Closeables.close(w, false);

        xi.assign(Functions.mult(1.0 / m));

        FileSystem fs = FileSystem.get(conf);

        Path tempDirPath = getTestTempDirPath("svd-proc");
        Path aPath = new Path(tempDirPath, "A/A.seq");
        fs.copyFromLocalFile(aLocPath, aPath);
        Path xiPath = new Path(tempDirPath, "xi/xi.seq");
        SSVDHelper.saveVector(xi, xiPath, conf);

        Path svdOutPath = new Path(tempDirPath, "SSVD-out");

        // make sure we wipe out previous test results, just a convenience
        fs.delete(svdOutPath, true);

        // Solver starts here:
        System.out.println("Input prepared, starting solver...");

        int ablockRows = 867;
        int p = 60;
        int k = 40;
        SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3);
        ssvd.setOuterBlockHeight(500);
        ssvd.setAbtBlockHeight(251);
        ssvd.setPcaMeanPath(xiPath);

        /*
         * Removing V,U jobs from this test to reduce running time. i will keep them
         * put in the dense test though.
         *
         * For PCA test, we also want to request U*Sigma output and check it for named
         * vector propagation.
         */
        ssvd.setComputeU(false);
        ssvd.setComputeV(false);
        ssvd.setcUSigma(true);

        ssvd.setOverwrite(true);
        ssvd.setQ(q);
        ssvd.setBroadcast(true);
        ssvd.run();

        Vector stochasticSValues = ssvd.getSingularValues();

        // try to run the same thing without stochastic algo
        Matrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf);

        verifyInternals(svdOutPath, a, new Omega(ssvd.getOmegaSeed(), k + p), k + p, q);

        // subtract pseudo pca mean
        for (int i = 0; i < m; i++) {
            a.viewRow(i).assign(xi, Functions.MINUS);
        }

        SingularValueDecomposition svd2 = new SingularValueDecomposition(a);

        Vector svalues2 = new DenseVector(svd2.getSingularValues());

        System.out.println("--SSVD solver singular values:");
        LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues);
        System.out.println("--SVD solver singular values:");
        LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2);

        for (int i = 0; i < k + p; i++) {
            assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon);
        }

        DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"),
                conf);

        SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon);

        // assert name propagation
        for (Iterator<Pair<Writable, Vector>> iter = SSVDHelper.drmIterator(fs,
                new Path(ssvd.getuSigmaPath() + "/*"), conf, closeables); iter.hasNext();) {
            Pair<Writable, Vector> pair = iter.next();
            Writable key = pair.getFirst();
            Vector v = pair.getSecond();

            assertTrue(v instanceof NamedVector);
            assertTrue(key instanceof Text);
        }

    } finally {
        IOUtils.close(closeables);
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDSolverSparseSequentialTest.java

License:Apache License

public void runSSVDSolver(int q) throws IOException {

    Configuration conf = getConfiguration();
    conf.set("mapred.job.tracker", "local");
    conf.set("fs.default.name", "file:///");

    // conf.set("mapred.job.tracker","localhost:11011");
    // conf.set("fs.default.name","hdfs://localhost:11010/");

    Deque<Closeable> closeables = Lists.newLinkedList();
    ;//from  ww w .  jav  a2s  .c  o m
    Random rnd = RandomUtils.getRandom();

    File tmpDir = getTestTempDir("svdtmp");
    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());

    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");

    // create distributed row matrix-like struct
    SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath,
            IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec());
    closeables.addFirst(w);

    int n = 100;
    int m = 2000;
    double percent = 5;

    VectorWritable vw = new VectorWritable();
    IntWritable roww = new IntWritable();

    double muAmplitude = 50.0;
    for (int i = 0; i < m; i++) {
        Vector dv = new SequentialAccessSparseVector(n);
        for (int j = 0; j < n * percent / 100; j++) {
            dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5));
        }
        roww.set(i);
        vw.set(dv);
        w.append(roww, vw);
    }
    closeables.remove(w);
    Closeables.close(w, false);

    FileSystem fs = FileSystem.get(aLocPath.toUri(), conf);

    Path tempDirPath = getTestTempDirPath("svd-proc");
    Path aPath = new Path(tempDirPath, "A/A.seq");
    fs.copyFromLocalFile(aLocPath, aPath);

    Path svdOutPath = new Path(tempDirPath, "SSVD-out");

    // make sure we wipe out previous test results, just a convenience
    fs.delete(svdOutPath, true);

    // Solver starts here:
    System.out.println("Input prepared, starting solver...");

    int ablockRows = 867;
    int p = 60;
    int k = 40;
    SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3);
    ssvd.setOuterBlockHeight(500);
    ssvd.setAbtBlockHeight(251);

    /*
     * removing V,U jobs from this test to reduce running time. i will keep them
     * put in the dense test though.
     */
    ssvd.setComputeU(false);
    ssvd.setComputeV(false);

    ssvd.setOverwrite(true);
    ssvd.setQ(q);
    ssvd.setBroadcast(true);
    ssvd.run();

    Vector stochasticSValues = ssvd.getSingularValues();
    System.out.println("--SSVD solver singular values:");
    dumpSv(stochasticSValues);
    System.out.println("--Colt SVD solver singular values:");

    // try to run the same thing without stochastic algo
    DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf);

    // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new
    // Array2DRowRealMatrix(a));
    SingularValueDecomposition svd2 = new SingularValueDecomposition(a);

    Vector svalues2 = new DenseVector(svd2.getSingularValues());
    dumpSv(svalues2);

    for (int i = 0; i < k + p; i++) {
        assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon);
    }

    DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"),
            conf);

    SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon);

    IOUtils.close(closeables);
}

From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java

License:Apache License

private static void produceDatePartitionedData() throws IOException {
    datePartitionedRowCount = 0;// ww  w. j  a va2s  . co m
    datePartitionedDir = new File("testhiveColumnarLoader-dateDir-" + System.currentTimeMillis());
    datePartitionedDir.mkdir();
    datePartitionedDir.deleteOnExit();

    int dates = 4;
    calendar = Calendar.getInstance();

    calendar.set(Calendar.DAY_OF_MONTH, Calendar.MONDAY);
    calendar.set(Calendar.MONTH, Calendar.JANUARY);

    startingDate = dateFormat.format(calendar.getTime());

    datePartitionedRCFiles = new ArrayList<String>();
    datePartitionedDirs = new ArrayList<String>();

    for (int i = 0; i < dates; i++) {

        File file = new File(datePartitionedDir, "daydate=" + dateFormat.format(calendar.getTime()));
        calendar.add(Calendar.DAY_OF_MONTH, 1);

        file.mkdir();
        file.deleteOnExit();

        // for each daydate write 5 partitions
        for (int pi = 0; pi < 5; pi++) {
            Path path = new Path(new Path(file.getAbsolutePath()), "parition" + pi);

            datePartitionedRowCount += writeRCFileTest(fs, simpleRowCount, path, columnCount,
                    new DefaultCodec(), columnCount);

            new File(path.toString()).deleteOnExit();
            datePartitionedRCFiles.add(path.toString());
            datePartitionedDirs.add(file.toString());

        }

    }

    endingDate = dateFormat.format(calendar.getTime());
}