List of usage examples for org.apache.hadoop.io.compress DefaultCodec DefaultCodec
DefaultCodec
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.rdfxml.DeflatedRdfXmlInputTest.java
License:Apache License
/** * Creates new tests */ public DeflatedRdfXmlInputTest() { super(".rdf.deflate", new DefaultCodec()); }
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.thrift.DeflatedThriftQuadInputTest.java
License:Apache License
/** * Creates new tests */ public DeflatedThriftQuadInputTest() { super(".trdf.deflate", new DefaultCodec()); }
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.thrift.DeflatedThriftTripleInputTest.java
License:Apache License
/** * Creates new tests */ public DeflatedThriftTripleInputTest() { super(".trdf.deflate", new DefaultCodec()); }
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.trig.DeflatedTriGInputTest.java
License:Apache License
/** * Creates new tests */ public DeflatedTriGInputTest() { super(".trig.deflate", new DefaultCodec()); }
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.trix.DeflatedTriXInputTest.java
License:Apache License
/** * Creates new tests */ public DeflatedTriXInputTest() { super(".trix.deflate", new DefaultCodec()); }
From source file:org.apache.jena.hadoop.rdf.io.input.compressed.turtle.DeflatedTurtleInputTest.java
License:Apache License
/** * Creates new tests */ public DeflatedTurtleInputTest() { super(".nt.deflate", new DefaultCodec()); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCADenseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = new LinkedList<Closeable>(); Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w);// w w w.j ava 2 s . c o m int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); Vector xi = new DenseVector(n); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); } roww.set(i); vw.set(dv); w.append(roww, vw); xi.assign(dv, Functions.PLUS); } closeables.remove(w); Closeables.close(w, true); xi.assign(Functions.mult(1 / m)); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path xiPath = new Path(tempDirPath, "xi/xi.seq"); SSVDHelper.saveVector(xi, xiPath, conf); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); ssvd.setPcaMeanPath(xiPath); /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf); // subtract pseudo pca mean for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) a[i][j] -= xi.getQuick(j); SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a)); Vector svalues2 = new DenseVector(svd2.getSingularValues()); LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } double[][] mQ = SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCASparseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = Lists.newLinkedList(); try {/*from w w w. j a v a 2s. c o m*/ Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, Text.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); Text rkey = new Text(); Vector xi = new DenseVector(n); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); String rowname = "row-" + i; NamedVector namedRow = new NamedVector(dv, rowname); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); } rkey.set("row-i" + i); vw.set(namedRow); w.append(rkey, vw); xi.assign(dv, Functions.PLUS); } closeables.remove(w); Closeables.close(w, false); xi.assign(Functions.mult(1.0 / m)); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path xiPath = new Path(tempDirPath, "xi/xi.seq"); SSVDHelper.saveVector(xi, xiPath, conf); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); ssvd.setPcaMeanPath(xiPath); /* * Removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. * * For PCA test, we also want to request U*Sigma output and check it for named * vector propagation. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setcUSigma(true); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); // try to run the same thing without stochastic algo Matrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); verifyInternals(svdOutPath, a, new Omega(ssvd.getOmegaSeed(), k + p), k + p, q); // subtract pseudo pca mean for (int i = 0; i < m; i++) { a.viewRow(i).assign(xi, Functions.MINUS); } SingularValueDecomposition svd2 = new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); System.out.println("--SSVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); System.out.println("--SVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); // assert name propagation for (Iterator<Pair<Writable, Vector>> iter = SSVDHelper.drmIterator(fs, new Path(ssvd.getuSigmaPath() + "/*"), conf, closeables); iter.hasNext();) { Pair<Writable, Vector> pair = iter.next(); Writable key = pair.getFirst(); Vector v = pair.getSecond(); assertTrue(v instanceof NamedVector); assertTrue(key instanceof Text); } } finally { IOUtils.close(closeables); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDSolverSparseSequentialTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = getConfiguration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = Lists.newLinkedList(); ;//from ww w . jav a2s .c o m Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5)); } roww.set(i); vw.set(dv); w.append(roww, vw); } closeables.remove(w); Closeables.close(w, false); FileSystem fs = FileSystem.get(aLocPath.toUri(), conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new // Array2DRowRealMatrix(a)); SingularValueDecomposition svd2 = new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); IOUtils.close(closeables); }
From source file:org.apache.pig.piggybank.test.storage.TestHiveColumnarLoader.java
License:Apache License
private static void produceDatePartitionedData() throws IOException { datePartitionedRowCount = 0;// ww w. j a va2s . co m datePartitionedDir = new File("testhiveColumnarLoader-dateDir-" + System.currentTimeMillis()); datePartitionedDir.mkdir(); datePartitionedDir.deleteOnExit(); int dates = 4; calendar = Calendar.getInstance(); calendar.set(Calendar.DAY_OF_MONTH, Calendar.MONDAY); calendar.set(Calendar.MONTH, Calendar.JANUARY); startingDate = dateFormat.format(calendar.getTime()); datePartitionedRCFiles = new ArrayList<String>(); datePartitionedDirs = new ArrayList<String>(); for (int i = 0; i < dates; i++) { File file = new File(datePartitionedDir, "daydate=" + dateFormat.format(calendar.getTime())); calendar.add(Calendar.DAY_OF_MONTH, 1); file.mkdir(); file.deleteOnExit(); // for each daydate write 5 partitions for (int pi = 0; pi < 5; pi++) { Path path = new Path(new Path(file.getAbsolutePath()), "parition" + pi); datePartitionedRowCount += writeRCFileTest(fs, simpleRowCount, path, columnCount, new DefaultCodec(), columnCount); new File(path.toString()).deleteOnExit(); datePartitionedRCFiles.add(path.toString()); datePartitionedDirs.add(file.toString()); } } endingDate = dateFormat.format(calendar.getTime()); }