List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:edu.iu.data_gen.DataGenerator.java
License:Apache License
/** * @param total_points//from www . ja v a2 s .c o m * @param int * @return * @brief */ public static void generateDenseDataMulti(int num_points, int nFeatures, int num_files, double norm, double offset, String sep, Path dataDir, String tmpDirName, FileSystem fs) throws IOException, InterruptedException, ExecutionException {//{{{ int pointsPerFile = num_points / num_files; // clean data dir content if (fs.exists(dataDir)) { fs.delete(dataDir, true); } // clean tmp local dir File tmpDir = new File(tmpDirName); if (tmpDir.exists() && tmpDir.isDirectory()) { for (File file : tmpDir.listFiles()) file.delete(); tmpDir.delete(); } boolean success = tmpDir.mkdir(); if (success) System.out.println("Directory: " + tmpDirName + " created"); if (pointsPerFile == 0) throw new IOException("No point to write."); // create parallel Java threads pool int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); // generate each file in parallel for (int k = 0; k < num_files; k++) { Future<?> f = service.submit(new DataGenMMDenseTask(pointsPerFile, tmpDirName, Integer.toString(k), nFeatures, norm, offset, sep)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); Path tmpDirPath = new Path(tmpDirName); fs.copyFromLocalFile(tmpDirPath, dataDir); DeleteFileFolder(tmpDirName); }
From source file:edu.iu.data_gen.DataGenerator.java
License:Apache License
public static void generateDenseLabelMulti(int num_points, int nFeatures, int num_files, int labelRange, String sep, Path dataDir, String tmpDirName, FileSystem fs) throws IOException, InterruptedException, ExecutionException {//{{{ int pointsPerFile = num_points / num_files; // clean data dir content if (fs.exists(dataDir)) { fs.delete(dataDir, true);/*from w w w. j a v a 2 s .com*/ } // clean tmp local dir File tmpDir = new File(tmpDirName); if (tmpDir.exists() && tmpDir.isDirectory()) { for (File file : tmpDir.listFiles()) file.delete(); tmpDir.delete(); } boolean success = tmpDir.mkdir(); if (success) System.out.println("Directory: " + tmpDirName + " created"); if (pointsPerFile == 0) throw new IOException("No point to write."); // create parallel Java threads pool int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); // generate each file in parallel for (int k = 0; k < num_files; k++) { Future<?> f = service.submit(new DataGenMMDenseLabelTask(pointsPerFile, tmpDirName, Integer.toString(k), nFeatures, labelRange, sep)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); Path tmpDirPath = new Path(tmpDirName); fs.copyFromLocalFile(tmpDirPath, dataDir); DeleteFileFolder(tmpDirName); }
From source file:edu.iu.data_gen.DataGenerator.java
License:Apache License
public static void generateDenseDataAndIntLabelMulti(int num_points, int nFeatures, int num_files, double norm, double offset, int labelRange, String sep, Path dataDir, String tmpDirName, FileSystem fs) throws IOException, InterruptedException, ExecutionException {//{{{ int pointsPerFile = num_points / num_files; // clean data dir content if (fs.exists(dataDir)) { fs.delete(dataDir, true);/*from w w w . j ava 2s. c o m*/ } // clean tmp local dir File tmpDir = new File(tmpDirName); if (tmpDir.exists() && tmpDir.isDirectory()) { for (File file : tmpDir.listFiles()) file.delete(); tmpDir.delete(); } boolean success = tmpDir.mkdir(); if (success) System.out.println("Directory: " + tmpDirName + " created"); if (pointsPerFile == 0) throw new IOException("No point to write."); // create parallel Java threads pool int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); // generate each file in parallel for (int k = 0; k < num_files; k++) { Future<?> f = service.submit(new DataGenMMDenseAndIntLabelTask(pointsPerFile, tmpDirName, Integer.toString(k), nFeatures, norm, offset, labelRange, sep)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); Path tmpDirPath = new Path(tmpDirName); fs.copyFromLocalFile(tmpDirPath, dataDir); DeleteFileFolder(tmpDirName); }
From source file:edu.iu.kmeans.DataGen.java
License:Apache License
/** * Generate data and upload to the data dir. * /* w w w . j a v a2s . c o m*/ * @param numOfDataPoints * @param vectorSize * @param numPointFiles * @param localInputDir * @param fs * @param dataDir * @throws IOException * @throws InterruptedException * @throws ExecutionException */ static void generateVectors(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { int pointsPerFile = numOfDataPoints / numPointFiles; System.out.println("Writing " + pointsPerFile + " vectors to a file"); // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true); } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } if (pointsPerFile == 0) { throw new IOException("No point to write."); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new ArrayList<Future<?>>(); for (int k = 0; k < numPointFiles; k++) { Future<?> f = service .submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); }
From source file:edu.iu.kmeans.regroupallgather.KMUtil.java
License:Apache License
/** * Generate data and upload to the data dir. * // ww w. j a v a 2 s . c om * @param numOfDataPoints * @param vectorSize * @param numPointFiles * @param localInputDir * @param fs * @param dataDir * @throws IOException * @throws InterruptedException * @throws ExecutionException */ static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir, FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException { int pointsPerFile = numOfDataPoints / numPointFiles; System.out.println("Writing " + pointsPerFile + " vectors to a file"); // Check data directory if (fs.exists(dataDir)) { fs.delete(dataDir, true); } // Check local directory File localDir = new File(localInputDir); // If existed, regenerate data if (localDir.exists() && localDir.isDirectory()) { for (File file : localDir.listFiles()) { file.delete(); } localDir.delete(); } boolean success = localDir.mkdir(); if (success) { System.out.println("Directory: " + localInputDir + " created"); } if (pointsPerFile == 0) { throw new IOException("No point to write."); } // Create random data points int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new LinkedList<Future<?>>(); for (int k = 0; k < numPointFiles; k++) { Future<?> f = service .submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize)); futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this // thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localInputDir); fs.copyFromLocalFile(localInput, dataDir); }
From source file:edu.iu.mds.DataGen.java
License:Apache License
static void generateDistanceMatrix(int numOfDataPoints, int numPartitions, String localDir, FileSystem fs, String dataDir) throws IOException, InterruptedException, ExecutionException { Path dataDirPath = new Path(dataDir); // Check data directory if (fs.exists(dataDirPath)) { fs.delete(dataDirPath, true);// w w w . j a v a2 s . co m } // Check local directory File newDir = new File(localDir); // If existed, regenerate data if (newDir.exists()) { newDir.delete(); } boolean success = newDir.mkdir(); if (success) { System.out.println("Directory: " + localDir + " created"); } // Create random data points // distMat[i][j] == distMat[j][i], distMat[i][i] = 0 // We set all distance to 1 double[] distMat = new double[numOfDataPoints * numOfDataPoints]; for (int i = 0; i < numOfDataPoints; i++) { for (int j = 0; j < i; j++) { distMat[i * numOfDataPoints + j] = 1; distMat[j * numOfDataPoints + i] = 1; } distMat[i * numOfDataPoints + i] = 0; } int height = numOfDataPoints / numPartitions; int rest = numOfDataPoints % numPartitions; int rowStart = 0; int poolSize = Runtime.getRuntime().availableProcessors(); ExecutorService service = Executors.newFixedThreadPool(poolSize); List<Future<?>> futures = new ArrayList<Future<?>>(); for (int i = 0; i < numPartitions; i++) { Future<?> f = null; if (rest > 0) { rest--; // Width, height, row ID, row Offset f = service .submit(new DataGenRunnable(distMat, localDir, numOfDataPoints, height + 1, i, rowStart)); rowStart = rowStart + height + 1; } else if (height > 0) { f = service.submit(new DataGenRunnable(distMat, localDir, numOfDataPoints, height, i, rowStart)); rowStart = rowStart + height; } else { break; } futures.add(f); // add a new thread } for (Future<?> f : futures) { f.get(); } // Shut down the executor service so that this thread can exit service.shutdownNow(); // Wrap to path object Path localInput = new Path(localDir); fs.copyFromLocalFile(localInput, dataDirPath); }
From source file:edu.uci.ics.asterix.aoya.HDFSBackup.java
License:Apache License
private void performBackup(List<Path[]> paths) throws IOException { FileSystem fs = FileSystem.get(conf); for (Path[] p : paths) { LOG.info("Backing up " + p[0] + " to " + p[1] + "."); fs.copyFromLocalFile(p[0], p[1]); }/*w w w. j av a2s . c om*/ }
From source file:edu.uci.ics.hyracks.hdfs.dataflow.DataflowTest.java
License:Apache License
/** * Start the HDFS cluster and setup the data files * //from w w w .ja v a2 s . c o m * @throws IOException */ private void startHDFS() throws IOException { conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileSystem lfs = FileSystem.getLocal(new Configuration()); lfs.delete(new Path("build"), true); System.setProperty("hadoop.log.dir", "logs"); dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null); FileSystem dfs = FileSystem.get(conf); Path src = new Path(DATA_PATH); Path dest = new Path(HDFS_INPUT_PATH); Path result = new Path(HDFS_OUTPUT_PATH); dfs.mkdirs(dest); dfs.mkdirs(result); dfs.copyFromLocalFile(src, dest); DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH))); conf.writeXml(confOutput); confOutput.flush(); confOutput.close(); }
From source file:edu.uci.ics.hyracks.hdfs2.dataflow.DataflowTest.java
License:Apache License
/** * Start the HDFS cluster and setup the data files * /*w w w .j a v a2s . c o m*/ * @throws IOException */ private void startHDFS() throws IOException { conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileSystem lfs = FileSystem.getLocal(new Configuration()); lfs.delete(new Path("build"), true); System.setProperty("hadoop.log.dir", "logs"); dfsCluster = dfsClusterFactory.getMiniDFSCluster(conf.getConfiguration(), numberOfNC); FileSystem dfs = FileSystem.get(conf.getConfiguration()); Path src = new Path(DATA_PATH); Path dest = new Path(HDFS_INPUT_PATH); Path result = new Path(HDFS_OUTPUT_PATH); dfs.mkdirs(dest); dfs.mkdirs(result); dfs.copyFromLocalFile(src, dest); DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH))); conf.getConfiguration().writeXml(confOutput); confOutput.flush(); confOutput.close(); }
From source file:edu.uci.ics.pregelix.example.jobrun.RunJobTestSuite.java
License:Apache License
private void startHDFS() throws IOException { conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml")); conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml")); FileSystem lfs = FileSystem.getLocal(new Configuration()); lfs.delete(new Path("build"), true); System.setProperty("hadoop.log.dir", "logs"); dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null); FileSystem dfs = FileSystem.get(conf); Path src = new Path(DATA_PATH); Path dest = new Path(HDFS_PATH); dfs.mkdirs(dest);/*from ww w . ja v a 2 s .c o m*/ dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH2); dest = new Path(HDFS_PATH2); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); src = new Path(DATA_PATH3); dest = new Path(HDFS_PATH3); dfs.mkdirs(dest); dfs.copyFromLocalFile(src, dest); DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH))); conf.writeXml(confOutput); confOutput.flush(); confOutput.close(); }