Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:distributed.hadoop.HDFSUtils.java

License:Open Source License

/**
 * Copy a local file into HDFS/*from   w  ww . j av  a  2  s  .  c  o m*/
 * 
 * @param localFile the path to the local file
 * @param hdfsPath the destination path in HDFS
 * @param config the HDFSConfig containing connection details
 * @param env environment variables
 * @param overwrite true if the destination should be overwritten (if it
 *          already exists)
 * @throws IOException if a problem occurs
 */
public static void copyToHDFS(String localFile, String hdfsPath, HDFSConfig config, Environment env,
        boolean overwrite) throws IOException {
    File local = new File(localFile);
    URI localURI = local.toURI();

    Path localPath = new Path(localURI);

    Path destPath = new Path(resolvePath(hdfsPath, env));

    Configuration conf = new Configuration();
    // conf.set(HDFSConfig.FS_DEFAULT_NAME,
    // HDFSConfig.constructHostURL(config, env));
    config.configureForHadoop(conf, env);

    FileSystem fs = FileSystem.get(conf);

    // only copy if the file doesn't exist or overwrite is specified
    if (!fs.exists(destPath) || overwrite) {
        if (fs.exists(destPath)) {
            fs.delete(destPath, true);
        }
        fs.copyFromLocalFile(localPath, destPath);
    }
}

From source file:dz.lab.hdfs.CopyMove.java

public static void copyFromLocalToHDFS(FileSystem fs) throws IOException {
    Path formLocal = new Path("/home/hadoop/sample.txt");
    Path toHdfs = new Path("/tmp/sample.txt");
    fs.copyFromLocalFile(formLocal, toHdfs);
}

From source file:edu.bigdata.training.hdfs.HdfsApp.java

public static void main(String[] argv) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // Hadoop DFS deals with Path
    Path inFile = new Path("src/main/resources/sample.txt");
    Path outFile = new Path("/user/myhome/sample");

    if (fs.exists(outFile)) {
        System.out.println("Output already exists");
        fs.delete(outFile);/*from  www.  ja  v a 2s.  c o  m*/
    }

    // Read from and write to new file
    fs.copyFromLocalFile(inFile, outFile);
    System.exit(0);
}

From source file:edu.iu.benchmark.DataGenRunnable.java

License:Apache License

public static void generateInputData(int numMappers, String localDirName, Path inputDirPath, FileSystem fs)
        throws IOException, InterruptedException, ExecutionException {
    // Check data directory
    if (fs.exists(inputDirPath)) {
        fs.delete(inputDirPath, true);/* ww w  .j a  v a 2  s.c o  m*/
    }
    // Check local directory
    // If existed, regenerate data
    File localDir = new File(localDirName);
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdirs();
    if (success) {
        System.out.println("Directory: " + localDir + " created");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new LinkedList<Future<?>>();
    for (int k = 0; k < numMappers; k++) {
        Future<?> f = service.submit(new DataGenRunnable(localDirName, k + ""));
        futures.add(f);
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    // Copy to HDFS
    Path localDirPath = new Path(localDirName);
    fs.copyFromLocalFile(localDirPath, inputDirPath);
}

From source file:edu.iu.daal_kmeans.regroupallgather.KMUtil.java

License:Apache License

/**
 * Generate data and upload to the data dir.
 * //w  w w .  j a  v a 2s  . com
 * @param numOfDataPoints
 * @param vectorSize
 * @param numPointFiles
 * @param localInputDir
 * @param fs
 * @param dataDir
 * @throws IOException
 * @throws InterruptedException
 * @throws ExecutionException
 */
static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir,
        FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {
    int pointsPerFile = numOfDataPoints / numPointFiles;
    System.out.println("Writing " + pointsPerFile + " vectors to a file");
    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }
    if (pointsPerFile == 0) {
        throw new IOException("No point to write.");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new LinkedList<Future<?>>();
    for (int k = 0; k < numPointFiles; k++) {

        Future<?> f = service
                .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));

        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);
    DeleteFileFolder(localInputDir);
}

From source file:edu.iu.daal_naive.NaiveUtil.java

License:Apache License

static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, int nClasses,
        String localInputDir, FileSystem fs, Path dataDir)
        throws IOException, InterruptedException, ExecutionException {

    int pointsPerFile = numOfDataPoints / numPointFiles;
    System.out.println("Writing " + pointsPerFile + " vectors to a file");
    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);/*from   ww  w . ja  va2s  . c  o  m*/
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }
    if (pointsPerFile == 0) {
        throw new IOException("No point to write.");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new LinkedList<Future<?>>();
    for (int k = 0; k < numPointFiles; k++) {
        Future<?> f = service.submit(
                new DataGenNaiveBayes(localInputDir, Integer.toString(k), pointsPerFile, vectorSize, nClasses));
        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);
}

From source file:edu.iu.daal_naive.NaiveUtil.java

License:Apache License

static void generateTestPoints(int numOfDataPoints, int vectorSize, int nClasses, String localInputDir,
        FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {

    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);//  w  w  w.  j a v  a  2  s  .co  m
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();

    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }

    // generate test points
    BufferedWriter writer = new BufferedWriter(new FileWriter(localInputDir + File.separator + "testdata"));
    Random random = new Random();

    double point = 0;
    int label = 0;
    for (int i = 0; i < numOfDataPoints; i++) {
        for (int j = 0; j < vectorSize; j++) {
            point = random.nextDouble() * 2 - 1;
            writer.write(String.valueOf(point));
            writer.write(",");
        }

        label = random.nextInt(nClasses);
        writer.write(String.valueOf(label));
        writer.newLine();
    }

    writer.close();
    System.out.println("Write test data file");

    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);

}

From source file:edu.iu.daal_naive.NaiveUtil.java

License:Apache License

static void generateGroundTruth(int numOfDataPoints, int nClasses, String localInputDir, FileSystem fs,
        Path dataDir) throws IOException, InterruptedException, ExecutionException {

    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);/*w ww.j  a  va 2  s  . c o m*/
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();

    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }

    // generate test points
    BufferedWriter writer = new BufferedWriter(new FileWriter(localInputDir + File.separator + "groundtruth"));
    Random random = new Random();

    // double point = 0;
    int label = 0;
    for (int i = 0; i < numOfDataPoints; i++) {
        // for (int j = 0; j < vectorSize; j++) {
        //    point = random.nextDouble()*2 -1;
        //    writer.write(String.valueOf(point));
        //    writer.write(",");
        // }
        label = random.nextInt(nClasses);
        writer.write(String.valueOf(label));
        writer.newLine();
    }

    writer.close();
    System.out.println("Write groundtruth data file");

    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);

}

From source file:edu.iu.daal_pca.PCAUtil.java

License:Apache License

/**
 * Generate data and upload to the data dir.
 *
 * @param numOfDataPoints//from w  w w  . ja  va2  s .co m
 * @param vectorSize
 * @param numPointFiles
 * @param localInputDir
 * @param fs
 * @param dataDir
 * @throws IOException
 * @throws InterruptedException
 * @throws ExecutionException
 */
static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir,
        FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {
    int pointsPerFile = numOfDataPoints / numPointFiles;
    System.out.println("Writing " + pointsPerFile + " vectors to a file");
    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }
    if (pointsPerFile == 0) {
        throw new IOException("No point to write.");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new LinkedList<Future<?>>();
    for (int k = 0; k < numPointFiles; k++) {
        // Future<?> f = service.submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));
        Future<?> f = service
                .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));
        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);
    DeleteFileFolder(localInputDir);
}

From source file:edu.iu.daal_svd.SVDUtil.java

License:Apache License

/**
 * Generate data and upload to the data dir.
 * //from   w w  w. java 2s.  c  o m
 * @param numOfDataPoints
 * @param vectorSize
 * @param numPointFiles
 * @param localInputDir
 * @param fs
 * @param dataDir
 * @throws IOException
 * @throws InterruptedException
 * @throws ExecutionException
 */
static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir,
        FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {
    int pointsPerFile = numOfDataPoints / numPointFiles;
    System.out.println("Writing " + pointsPerFile + " vectors to a file");
    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }
    if (pointsPerFile == 0) {
        throw new IOException("No point to write.");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new LinkedList<Future<?>>();
    for (int k = 0; k < numPointFiles; k++) {
        // Future<?> f =
        //   service.submit(new DataGenRunnable(
        //     pointsPerFile, localInputDir, Integer
        //       .toString(k), vectorSize));
        Future<?> f = service
                .submit(new DataGenMMDense(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));

        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);
    DeleteFileFolder(localInputDir);
}