List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:org.apache.hive.service.TestDFSErrorHandling.java
License:Apache License
@Test public void testAccessDenied() throws Exception { assertTrue("Test setup failed. MiniHS2 is not initialized", miniHS2 != null && miniHS2.isStarted()); Class.forName(MiniHS2.getJdbcDriverName()); Path scratchDir = new Path(HiveConf.getVar(hiveConf, HiveConf.ConfVars.SCRATCHDIR)); MiniDFSShim dfs = miniHS2.getDfs();//from w w w . j a v a 2 s . co m FileSystem fs = dfs.getFileSystem(); Path stickyBitDir = new Path(scratchDir, "stickyBitDir"); fs.mkdirs(stickyBitDir); String dataFileDir = hiveConf.get("test.data.files").replace('\\', '/').replace("c:", "").replace("C:", "") .replace("D:", "").replace("d:", ""); Path dataFilePath = new Path(dataFileDir, "kv1.txt"); fs.copyFromLocalFile(dataFilePath, stickyBitDir); FsPermission fsPermission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL, true); // Sets the sticky bit on stickyBitDir - now removing file kv1.txt from stickyBitDir by // unprivileged user will result in a DFS error. fs.setPermission(stickyBitDir, fsPermission); FileStatus[] files = fs.listStatus(stickyBitDir); // Connecting to HS2 as foo. Connection hs2Conn = DriverManager.getConnection(miniHS2.getJdbcURL(), "foo", "bar"); Statement stmt = hs2Conn.createStatement(); String tableName = "stickyBitTable"; stmt.execute("drop table if exists " + tableName); stmt.execute("create table " + tableName + " (foo int, bar string)"); try { // This statement will attempt to move kv1.txt out of stickyBitDir as user foo. HS2 is // expected to return 20009. stmt.execute("LOAD DATA INPATH '" + stickyBitDir.toUri().getPath() + "/kv1.txt' " + "OVERWRITE INTO TABLE " + tableName); } catch (Exception e) { if (e instanceof SQLException) { SQLException se = (SQLException) e; Assert.assertEquals("Unexpected error code", 20009, se.getErrorCode()); System.out.println(String.format("Error Message: %s", se.getMessage())); } else throw e; } stmt.execute("drop table if exists " + tableName); stmt.close(); hs2Conn.close(); }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopPopularWordsTest.java
License:Apache License
/** * Prepare job's data: cleanup result directories that might have left over * after previous runs, copy input files from the local file system into DFS. * * @param fs Distributed file system to use in job. * @throws IOException If failed.//from w w w .ja v a2 s . co m */ private void prepareDirectories(FileSystem fs) throws IOException { X.println(">>> Cleaning up DFS result directory: " + RESULT_DFS_DIR); fs.delete(RESULT_DFS_DIR, true); X.println(">>> Cleaning up DFS input directory: " + BOOKS_DFS_DIR); fs.delete(BOOKS_DFS_DIR, true); X.println(">>> Copy local files into DFS input directory: " + BOOKS_DFS_DIR); fs.copyFromLocalFile(BOOKS_LOCAL_DIR, BOOKS_DFS_DIR); }
From source file:org.apache.lens.regression.util.HadoopUtil.java
License:Apache License
public static void uploadJars(String sourcePath, String hdfsDestinationPath) throws IOException { Configuration conf = HadoopUtil.getHadoopConfiguration(); FileSystem fs = FileSystem.get(conf); Path localFilePath = new Path(sourcePath); Path hdfsFilePath = new Path(hdfsDestinationPath); log.info("Copying " + sourcePath + " to " + hdfsDestinationPath); fs.copyFromLocalFile(localFilePath, hdfsFilePath); log.info("Copied Successfully " + sourcePath + " to " + hdfsDestinationPath); }
From source file:org.apache.mahout.classifier.bayes.BayesClassifierSelfTest.java
License:Apache License
@Override protected void setUp() throws Exception { super.setUp(); File tempInputFile = getTestTempFile("bayesinput"); BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(tempInputFile), Charset.forName("UTF-8"))); for (String[] entry : ClassifierData.DATA) { writer.write(entry[0] + '\t' + entry[1] + '\n'); }/* w ww .j ava2s .com*/ writer.close(); Path input = getTestTempFilePath("bayesinput"); Configuration conf = new Configuration(); FileSystem fs = input.getFileSystem(conf); fs.copyFromLocalFile(new Path(tempInputFile.getAbsolutePath()), input); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCADenseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = new LinkedList<Closeable>(); Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w);//from w ww.j ava 2s . c o m int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); Vector xi = new DenseVector(n); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); } roww.set(i); vw.set(dv); w.append(roww, vw); xi.assign(dv, Functions.PLUS); } closeables.remove(w); Closeables.close(w, true); xi.assign(Functions.mult(1 / m)); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path xiPath = new Path(tempDirPath, "xi/xi.seq"); SSVDHelper.saveVector(xi, xiPath, conf); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); ssvd.setPcaMeanPath(xiPath); /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf); // subtract pseudo pca mean for (int i = 0; i < m; i++) for (int j = 0; j < n; j++) a[i][j] -= xi.getQuick(j); SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a)); Vector svalues2 = new DenseVector(svd2.getSingularValues()); LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } double[][] mQ = SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDPCASparseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = Lists.newLinkedList(); try {/*from w w w .j a v a 2 s.c o m*/ Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, Text.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); Text rkey = new Text(); Vector xi = new DenseVector(n); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); String rowname = "row-" + i; NamedVector namedRow = new NamedVector(dv, rowname); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); } rkey.set("row-i" + i); vw.set(namedRow); w.append(rkey, vw); xi.assign(dv, Functions.PLUS); } closeables.remove(w); Closeables.close(w, false); xi.assign(Functions.mult(1.0 / m)); FileSystem fs = FileSystem.get(conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path xiPath = new Path(tempDirPath, "xi/xi.seq"); SSVDHelper.saveVector(xi, xiPath, conf); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); ssvd.setPcaMeanPath(xiPath); /* * Removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. * * For PCA test, we also want to request U*Sigma output and check it for named * vector propagation. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setcUSigma(true); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); // try to run the same thing without stochastic algo Matrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); verifyInternals(svdOutPath, a, new Omega(ssvd.getOmegaSeed(), k + p), k + p, q); // subtract pseudo pca mean for (int i = 0; i < m; i++) { a.viewRow(i).assign(xi, Functions.MINUS); } SingularValueDecomposition svd2 = new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); System.out.println("--SSVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); System.out.println("--SVD solver singular values:"); LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); // assert name propagation for (Iterator<Pair<Writable, Vector>> iter = SSVDHelper.drmIterator(fs, new Path(ssvd.getuSigmaPath() + "/*"), conf, closeables); iter.hasNext();) { Pair<Writable, Vector> pair = iter.next(); Writable key = pair.getFirst(); Vector v = pair.getSecond(); assertTrue(v instanceof NamedVector); assertTrue(key instanceof Text); } } finally { IOUtils.close(closeables); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDSolverDenseTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = getConfiguration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct // SequenceFile.Writer w = SequenceFile.createWriter( // FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, // VectorWritable.class, CompressionType.NONE, new DefaultCodec()); // closeables.addFirst(w); // make input equivalent to 2 mln non-zero elements. // With 100mln the precision turns out to be only better (LLN law i guess) // With oversampling of 100, i don't get any error at all. int n = 100;/* w ww . j a v a 2 s .co m*/ int m = 2000; Vector singularValues = new DenseVector(new double[] { 10, 4, 1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 }); SSVDTestsHelper.generateDenseInput(aLocPath, FileSystem.getLocal(conf), singularValues, m, n); FileSystem fs = FileSystem.get(aLocPath.toUri(), conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 10; int k = 3; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); /* * these are only tiny-test values to simulate high load cases, in reality * one needs much bigger */ ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(400); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(false); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); // the full-rank svd for this test size takes too long to run, // so i comment it out, instead, i will be comparing // result singular values to the original values used // to generate input (which are guaranteed to be right). /* * System.out.println("--Colt SVD solver singular values:"); // try to run * * the same thing without stochastic algo double[][] a = * SSVDSolver.drmLoadAsDense(fs, aPath, conf); * * * * SingularValueDecomposition svd2 = new SingularValueDecomposition(new * DenseMatrix(a)); * * a = null; * * double[] svalues2 = svd2.getSingularValues(); dumpSv(svalues2); * * for (int i = 0; i < k ; i++) { Assert .assertTrue(1-Math.abs((svalues2[i] * - stochasticSValues[i])/svalues2[i]) <= s_precisionPct/100); } */ // assert first k against those // used to generate surrogate input for (int i = 0; i < k; i++) { assertTrue(Math.abs((singularValues.getQuick(i) - stochasticSValues.getQuick(i)) / singularValues.getQuick(i)) <= s_precisionPct / 100); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); DenseMatrix u = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "U/*"), conf); SSVDCommonTest.assertOrthonormality(u, false, s_epsilon); DenseMatrix v = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "V/*"), conf); SSVDCommonTest.assertOrthonormality(v, false, s_epsilon); }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.LocalSSVDSolverSparseSequentialTest.java
License:Apache License
public void runSSVDSolver(int q) throws IOException { Configuration conf = getConfiguration(); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); // conf.set("mapred.job.tracker","localhost:11011"); // conf.set("fs.default.name","hdfs://localhost:11010/"); Deque<Closeable> closeables = Lists.newLinkedList(); ;//w ww .ja v a2 s.co m Random rnd = RandomUtils.getRandom(); File tmpDir = getTestTempDir("svdtmp"); conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); // create distributed row matrix-like struct SequenceFile.Writer w = SequenceFile.createWriter(FileSystem.getLocal(conf), conf, aLocPath, IntWritable.class, VectorWritable.class, CompressionType.BLOCK, new DefaultCodec()); closeables.addFirst(w); int n = 100; int m = 2000; double percent = 5; VectorWritable vw = new VectorWritable(); IntWritable roww = new IntWritable(); double muAmplitude = 50.0; for (int i = 0; i < m; i++) { Vector dv = new SequentialAccessSparseVector(n); for (int j = 0; j < n * percent / 100; j++) { dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5)); } roww.set(i); vw.set(dv); w.append(roww, vw); } closeables.remove(w); Closeables.close(w, false); FileSystem fs = FileSystem.get(aLocPath.toUri(), conf); Path tempDirPath = getTestTempDirPath("svd-proc"); Path aPath = new Path(tempDirPath, "A/A.seq"); fs.copyFromLocalFile(aLocPath, aPath); Path svdOutPath = new Path(tempDirPath, "SSVD-out"); // make sure we wipe out previous test results, just a convenience fs.delete(svdOutPath, true); // Solver starts here: System.out.println("Input prepared, starting solver..."); int ablockRows = 867; int p = 60; int k = 40; SSVDSolver ssvd = new SSVDSolver(conf, new Path[] { aPath }, svdOutPath, ablockRows, k, p, 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new // Array2DRowRealMatrix(a)); SingularValueDecomposition svd2 = new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); dumpSv(svalues2); for (int i = 0; i < k + p; i++) { assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } DenseMatrix mQ = SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); SSVDCommonTest.assertOrthonormality(mQ, false, s_epsilon); IOUtils.close(closeables); }
From source file:org.apache.metron.common.dsl.functions.resolver.ClasspathFunctionResolverIntegrationTest.java
License:Apache License
@BeforeClass public static void setup() { component = new MRComponent().withBasePath("target"); component.start();/*from w w w . ja v a 2 s. co m*/ configuration = component.getConfiguration(); try { FileSystem fs = FileSystem.newInstance(configuration); fs.mkdirs(new Path("/classpath-resources")); fs.copyFromLocalFile(new Path("src/test/classpath-resources/custom-1.0-SNAPSHOT.jar"), new Path("/classpath-resources")); } catch (IOException e) { throw new RuntimeException("Unable to start cluster", e); } }
From source file:org.apache.metron.maas.service.Client.java
License:Apache License
private Path addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String appId, Map<String, LocalResource> localResources, String resources) throws IOException { String suffix = appName + "/" + appId + "/" + fileDstPath; Path dst = new Path(fs.getHomeDirectory(), suffix); if (fileSrcPath == null) { FSDataOutputStream ostream = null; try {// w w w . ja va 2 s. c o m ostream = FileSystem.create(fs, dst, new FsPermission((short) 0710)); ostream.writeUTF(resources); } finally { IOUtils.closeQuietly(ostream); } } else { fs.copyFromLocalFile(new Path(fileSrcPath), dst); } FileStatus scFileStatus = fs.getFileStatus(dst); LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(), scFileStatus.getModificationTime()); localResources.put(fileDstPath, scRsrc); return dst; }