List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:distributed.hadoop.HDFSUtils.java
License:Open Source License
/** * Copy a local file into HDFS/*from w ww . j av a 2 s . c o m*/ * * @param localFile the path to the local file * @param hdfsPath the destination path in HDFS * @param config the HDFSConfig containing connection details * @param env environment variables * @param overwrite true if the destination should be overwritten (if it * already exists) * @throws IOException if a problem occurs */ public static void copyToHDFS(String localFile, String hdfsPath, HDFSConfig config, Environment env, boolean overwrite) throws IOException { File local = new File(localFile); URI localURI = local.toURI(); Path localPath = new Path(localURI); Path destPath = new Path(resolvePath(hdfsPath, env)); Configuration conf = new Configuration(); // conf.set(HDFSConfig.FS_DEFAULT_NAME, // HDFSConfig.constructHostURL(config, env)); config.configureForHadoop(conf, env); FileSystem fs = FileSystem.get(conf); // only copy if the file doesn't exist or overwrite is specified if (!fs.exists(destPath) || overwrite) { if (fs.exists(destPath)) { fs.delete(destPath, true); } fs.copyFromLocalFile(localPath, destPath); } }
From source file:dz.lab.hdfs.CopyMove.java
public static void copyFromLocalToHDFS(FileSystem fs) throws IOException { Path formLocal = new Path("/home/hadoop/sample.txt"); Path toHdfs = new Path("/tmp/sample.txt"); fs.copyFromLocalFile(formLocal, toHdfs); }
From source file:edu.bigdata.training.hdfs.HdfsApp.java
public static void main(String[] argv) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // Hadoop DFS deals with Path Path inFile = new Path("src/main/resources/sample.txt"); Path outFile = new Path("/user/myhome/sample"); if (fs.exists(outFile)) { System.out.println("Output already exists"); fs.delete(outFile);/*from www. ja v a 2s. c o m*/ } // Read from and write to new file fs.copyFromLocalFile(inFile, outFile); System.exit(0); }
From source file:edu.iu.benchmark.DataGenRunnable.java
License:Apache License
public static void generateInputData(int numMappers, String localDirName, Path inputDirPath, FileSystem fs) throws IOException, InterruptedException, ExecutionException { // Check data directory if (fs.exists(inputDirPath)) { fs.delete(inputDirPath, true);/* ww w .j a v a 2 s.c o m*/ } // Check local directory // If existed, regenerate data File localDir = new File(localDirName); if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdirs(); if (success) { System.out.println("Directory: " + localDir + " created"); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); for (int k = 0; k < numMappers; k++) { Future<?> f = service.submit(new DataGenRunnable(localDirName, k + "")); futures.add(f); } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); // Copy to HDFS Path localDirPath = new Path(localDirName); fs.copyFromLocalFile(localDirPath, inputDirPath); }
From source file:edu.iu.daal_kmeans.regroupallgather.KMUtil.java
License:Apache License
/** * Generate data and upload to the data dir. * //w w w . j a v a 2s . com * @param numOfDataPoints * @param vectorSize * @param numPointFiles * @param localInputDir * @param fs * @param dataDir * @throws IOException * @throws InterruptedException * @throws ExecutionException */ static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { int pointsPerFile = numOfDataPoints / numPointFiles; System.out.println("Writing " + pointsPerFile + " vectors to a file"); // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true); } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } if (pointsPerFile == 0) { throw new IOException("No point to write."); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); for (int k = 0; k < numPointFiles; k++) { Future<?> f = service .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); DeleteFileFolder(localInputDir); }
From source file:edu.iu.daal_naive.NaiveUtil.java
License:Apache License
static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, int nClasses, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { int pointsPerFile = numOfDataPoints / numPointFiles; System.out.println("Writing " + pointsPerFile + " vectors to a file"); // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true);/*from ww w . ja va2s . c o m*/ } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } if (pointsPerFile == 0) { throw new IOException("No point to write."); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); for (int k = 0; k < numPointFiles; k++) { Future<?> f = service.submit( new DataGenNaiveBayes(localInputDir, Integer.toString(k), pointsPerFile, vectorSize, nClasses)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); }
From source file:edu.iu.daal_naive.NaiveUtil.java
License:Apache License
static void generateTestPoints(int numOfDataPoints, int vectorSize, int nClasses, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true);// w w w. j a v a 2 s .co m } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } // generate test points BufferedWriter writer = new BufferedWriter(new FileWriter(localInputDir + File.separator + "testdata")); Random random = new Random(); double point = 0; int label = 0; for (int i = 0; i < numOfDataPoints; i++) { for (int j = 0; j < vectorSize; j++) { point = random.nextDouble() * 2 - 1; writer.write(String.valueOf(point)); writer.write(","); } label = random.nextInt(nClasses); writer.write(String.valueOf(label)); writer.newLine(); } writer.close(); System.out.println("Write test data file"); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); }
From source file:edu.iu.daal_naive.NaiveUtil.java
License:Apache License
static void generateGroundTruth(int numOfDataPoints, int nClasses, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true);/*w ww.j a va 2 s . c o m*/ } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } // generate test points BufferedWriter writer = new BufferedWriter(new FileWriter(localInputDir + File.separator + "groundtruth")); Random random = new Random(); // double point = 0; int label = 0; for (int i = 0; i < numOfDataPoints; i++) { // for (int j = 0; j < vectorSize; j++) { // point = random.nextDouble()*2 -1; // writer.write(String.valueOf(point)); // writer.write(","); // } label = random.nextInt(nClasses); writer.write(String.valueOf(label)); writer.newLine(); } writer.close(); System.out.println("Write groundtruth data file"); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); }
From source file:edu.iu.daal_pca.PCAUtil.java
License:Apache License
/** * Generate data and upload to the data dir. * * @param numOfDataPoints//from w w w . ja va2 s .co m * @param vectorSize * @param numPointFiles * @param localInputDir * @param fs * @param dataDir * @throws IOException * @throws InterruptedException * @throws ExecutionException */ static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { int pointsPerFile = numOfDataPoints / numPointFiles; System.out.println("Writing " + pointsPerFile + " vectors to a file"); // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true); } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } if (pointsPerFile == 0) { throw new IOException("No point to write."); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); for (int k = 0; k < numPointFiles; k++) { // Future<?> f = service.submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize)); Future<?> f = service .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); DeleteFileFolder(localInputDir); }
From source file:edu.iu.daal_svd.SVDUtil.java
License:Apache License
/** * Generate data and upload to the data dir. * //from w w w. java 2s. c o m * @param numOfDataPoints * @param vectorSize * @param numPointFiles * @param localInputDir * @param fs * @param dataDir * @throws IOException * @throws InterruptedException * @throws ExecutionException */ static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { int pointsPerFile = numOfDataPoints / numPointFiles; System.out.println("Writing " + pointsPerFile + " vectors to a file"); // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true); } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } if (pointsPerFile == 0) { throw new IOException("No point to write."); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); for (int k = 0; k < numPointFiles; k++) { // Future<?> f = // service.submit(new DataGenRunnable( // pointsPerFile, localInputDir, Integer // .toString(k), vectorSize)); Future<?> f = service .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); DeleteFileFolder(localInputDir); }