Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:edu.iu.data_gen.DataGenerator.java

License:Apache License

/**
 * @param total_points//from   www  . ja v a2  s  .c  o  m
 * @param int
 * @return
 * @brief
 */
public static void generateDenseDataMulti(int num_points, int nFeatures, int num_files, double norm,
        double offset, String sep, Path dataDir, String tmpDirName, FileSystem fs)
        throws IOException, InterruptedException, ExecutionException {//{{{

    int pointsPerFile = num_points / num_files;
    // clean data dir content
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);
    }

    // clean tmp local dir
    File tmpDir = new File(tmpDirName);
    if (tmpDir.exists() && tmpDir.isDirectory()) {
        for (File file : tmpDir.listFiles())
            file.delete();

        tmpDir.delete();
    }

    boolean success = tmpDir.mkdir();
    if (success)
        System.out.println("Directory: " + tmpDirName + " created");

    if (pointsPerFile == 0)
        throw new IOException("No point to write.");

    // create parallel Java threads pool
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);

    List<Future<?>> futures = new LinkedList<Future<?>>();

    // generate each file in parallel
    for (int k = 0; k < num_files; k++) {
        Future<?> f = service.submit(new DataGenMMDenseTask(pointsPerFile, tmpDirName, Integer.toString(k),
                nFeatures, norm, offset, sep));

        futures.add(f); // add a new thread

    }

    for (Future<?> f : futures) {
        f.get();
    }

    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    Path tmpDirPath = new Path(tmpDirName);
    fs.copyFromLocalFile(tmpDirPath, dataDir);
    DeleteFileFolder(tmpDirName);

}

From source file:edu.iu.data_gen.DataGenerator.java

License:Apache License

public static void generateDenseLabelMulti(int num_points, int nFeatures, int num_files, int labelRange,
        String sep, Path dataDir, String tmpDirName, FileSystem fs)
        throws IOException, InterruptedException, ExecutionException {//{{{

    int pointsPerFile = num_points / num_files;
    // clean data dir content
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);/*from   w  w w. j a v  a 2 s  .com*/
    }

    // clean tmp local dir
    File tmpDir = new File(tmpDirName);
    if (tmpDir.exists() && tmpDir.isDirectory()) {
        for (File file : tmpDir.listFiles())
            file.delete();

        tmpDir.delete();
    }

    boolean success = tmpDir.mkdir();
    if (success)
        System.out.println("Directory: " + tmpDirName + " created");

    if (pointsPerFile == 0)
        throw new IOException("No point to write.");

    // create parallel Java threads pool
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);

    List<Future<?>> futures = new LinkedList<Future<?>>();

    // generate each file in parallel
    for (int k = 0; k < num_files; k++) {
        Future<?> f = service.submit(new DataGenMMDenseLabelTask(pointsPerFile, tmpDirName, Integer.toString(k),
                nFeatures, labelRange, sep));

        futures.add(f); // add a new thread

    }

    for (Future<?> f : futures) {
        f.get();
    }

    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    Path tmpDirPath = new Path(tmpDirName);
    fs.copyFromLocalFile(tmpDirPath, dataDir);
    DeleteFileFolder(tmpDirName);

}

From source file:edu.iu.data_gen.DataGenerator.java

License:Apache License

public static void generateDenseDataAndIntLabelMulti(int num_points, int nFeatures, int num_files, double norm,
        double offset, int labelRange, String sep, Path dataDir, String tmpDirName, FileSystem fs)
        throws IOException, InterruptedException, ExecutionException {//{{{

    int pointsPerFile = num_points / num_files;
    // clean data dir content
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);/*from  w w  w  . j ava  2s. c o  m*/
    }

    // clean tmp local dir
    File tmpDir = new File(tmpDirName);
    if (tmpDir.exists() && tmpDir.isDirectory()) {
        for (File file : tmpDir.listFiles())
            file.delete();

        tmpDir.delete();
    }

    boolean success = tmpDir.mkdir();
    if (success)
        System.out.println("Directory: " + tmpDirName + " created");

    if (pointsPerFile == 0)
        throw new IOException("No point to write.");

    // create parallel Java threads pool
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);

    List<Future<?>> futures = new LinkedList<Future<?>>();

    // generate each file in parallel
    for (int k = 0; k < num_files; k++) {
        Future<?> f = service.submit(new DataGenMMDenseAndIntLabelTask(pointsPerFile, tmpDirName,
                Integer.toString(k), nFeatures, norm, offset, labelRange, sep));

        futures.add(f); // add a new thread

    }

    for (Future<?> f : futures) {
        f.get();
    }

    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    Path tmpDirPath = new Path(tmpDirName);
    fs.copyFromLocalFile(tmpDirPath, dataDir);
    DeleteFileFolder(tmpDirName);

}

From source file:edu.iu.kmeans.DataGen.java

License:Apache License

/**
 * Generate data and upload to the data dir.
 * /* w  w w . j  a v a2s  .  c  o  m*/
 * @param numOfDataPoints
 * @param vectorSize
 * @param numPointFiles
 * @param localInputDir
 * @param fs
 * @param dataDir
 * @throws IOException
 * @throws InterruptedException
 * @throws ExecutionException
 */
static void generateVectors(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir,
        FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {
    int pointsPerFile = numOfDataPoints / numPointFiles;
    System.out.println("Writing " + pointsPerFile + " vectors to a file");
    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }
    if (pointsPerFile == 0) {
        throw new IOException("No point to write.");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new ArrayList<Future<?>>();
    for (int k = 0; k < numPointFiles; k++) {
        Future<?> f = service
                .submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));
        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);
}

From source file:edu.iu.kmeans.regroupallgather.KMUtil.java

License:Apache License

/**
 * Generate data and upload to the data dir.
 * // ww  w. j  a  v a  2  s .  c om
 * @param numOfDataPoints
 * @param vectorSize
 * @param numPointFiles
 * @param localInputDir
 * @param fs
 * @param dataDir
 * @throws IOException
 * @throws InterruptedException
 * @throws ExecutionException
 */
static void generatePoints(int numOfDataPoints, int vectorSize, int numPointFiles, String localInputDir,
        FileSystem fs, Path dataDir) throws IOException, InterruptedException, ExecutionException {
    int pointsPerFile = numOfDataPoints / numPointFiles;
    System.out.println("Writing " + pointsPerFile + " vectors to a file");
    // Check data directory
    if (fs.exists(dataDir)) {
        fs.delete(dataDir, true);
    }
    // Check local directory
    File localDir = new File(localInputDir);
    // If existed, regenerate data
    if (localDir.exists() && localDir.isDirectory()) {
        for (File file : localDir.listFiles()) {
            file.delete();
        }
        localDir.delete();
    }
    boolean success = localDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localInputDir + " created");
    }
    if (pointsPerFile == 0) {
        throw new IOException("No point to write.");
    }
    // Create random data points
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new LinkedList<Future<?>>();
    for (int k = 0; k < numPointFiles; k++) {
        Future<?> f = service
                .submit(new DataGenRunnable(pointsPerFile, localInputDir, Integer.toString(k), vectorSize));
        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this
    // thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localInputDir);
    fs.copyFromLocalFile(localInput, dataDir);
}

From source file:edu.iu.mds.DataGen.java

License:Apache License

static void generateDistanceMatrix(int numOfDataPoints, int numPartitions, String localDir, FileSystem fs,
        String dataDir) throws IOException, InterruptedException, ExecutionException {
    Path dataDirPath = new Path(dataDir);
    // Check data directory
    if (fs.exists(dataDirPath)) {
        fs.delete(dataDirPath, true);// w  w  w  . j  a v a2  s .  co  m
    }
    // Check local directory
    File newDir = new File(localDir);
    // If existed, regenerate data
    if (newDir.exists()) {
        newDir.delete();
    }
    boolean success = newDir.mkdir();
    if (success) {
        System.out.println("Directory: " + localDir + " created");
    }
    // Create random data points
    // distMat[i][j] == distMat[j][i], distMat[i][i] = 0
    // We set all distance to 1
    double[] distMat = new double[numOfDataPoints * numOfDataPoints];
    for (int i = 0; i < numOfDataPoints; i++) {
        for (int j = 0; j < i; j++) {
            distMat[i * numOfDataPoints + j] = 1;
            distMat[j * numOfDataPoints + i] = 1;
        }
        distMat[i * numOfDataPoints + i] = 0;
    }
    int height = numOfDataPoints / numPartitions;
    int rest = numOfDataPoints % numPartitions;
    int rowStart = 0;
    int poolSize = Runtime.getRuntime().availableProcessors();
    ExecutorService service = Executors.newFixedThreadPool(poolSize);
    List<Future<?>> futures = new ArrayList<Future<?>>();
    for (int i = 0; i < numPartitions; i++) {
        Future<?> f = null;
        if (rest > 0) {
            rest--;
            // Width, height, row ID, row Offset
            f = service
                    .submit(new DataGenRunnable(distMat, localDir, numOfDataPoints, height + 1, i, rowStart));
            rowStart = rowStart + height + 1;
        } else if (height > 0) {
            f = service.submit(new DataGenRunnable(distMat, localDir, numOfDataPoints, height, i, rowStart));
            rowStart = rowStart + height;
        } else {
            break;
        }
        futures.add(f); // add a new thread
    }
    for (Future<?> f : futures) {
        f.get();
    }
    // Shut down the executor service so that this thread can exit
    service.shutdownNow();
    // Wrap to path object
    Path localInput = new Path(localDir);
    fs.copyFromLocalFile(localInput, dataDirPath);
}

From source file:edu.uci.ics.asterix.aoya.HDFSBackup.java

License:Apache License

private void performBackup(List<Path[]> paths) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (Path[] p : paths) {
        LOG.info("Backing up " + p[0] + " to " + p[1] + ".");
        fs.copyFromLocalFile(p[0], p[1]);
    }/*w  w  w. j av  a2s  . c om*/
}

From source file:edu.uci.ics.hyracks.hdfs.dataflow.DataflowTest.java

License:Apache License

/**
 * Start the HDFS cluster and setup the data files
 * //from w  w w  .ja  v a2 s  . c  o  m
 * @throws IOException
 */
private void startHDFS() throws IOException {
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));

    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
    FileSystem dfs = FileSystem.get(conf);
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_INPUT_PATH);
    Path result = new Path(HDFS_OUTPUT_PATH);
    dfs.mkdirs(dest);
    dfs.mkdirs(result);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.uci.ics.hyracks.hdfs2.dataflow.DataflowTest.java

License:Apache License

/**
 * Start the HDFS cluster and setup the data files
 * /*w w w  .j  a  v a2s .  c  o  m*/
 * @throws IOException
 */
private void startHDFS() throws IOException {
    conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.getConfiguration().addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));

    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = dfsClusterFactory.getMiniDFSCluster(conf.getConfiguration(), numberOfNC);
    FileSystem dfs = FileSystem.get(conf.getConfiguration());
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_INPUT_PATH);
    Path result = new Path(HDFS_OUTPUT_PATH);
    dfs.mkdirs(dest);
    dfs.mkdirs(result);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.getConfiguration().writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}

From source file:edu.uci.ics.pregelix.example.jobrun.RunJobTestSuite.java

License:Apache License

private void startHDFS() throws IOException {
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
    conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
    FileSystem lfs = FileSystem.getLocal(new Configuration());
    lfs.delete(new Path("build"), true);
    System.setProperty("hadoop.log.dir", "logs");
    dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
    FileSystem dfs = FileSystem.get(conf);
    Path src = new Path(DATA_PATH);
    Path dest = new Path(HDFS_PATH);
    dfs.mkdirs(dest);/*from ww  w .  ja v a 2 s .c o  m*/
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH2);
    dest = new Path(HDFS_PATH2);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    src = new Path(DATA_PATH3);
    dest = new Path(HDFS_PATH3);
    dfs.mkdirs(dest);
    dfs.copyFromLocalFile(src, dest);

    DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
    conf.writeXml(confOutput);
    confOutput.flush();
    confOutput.close();
}