Example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyFromLocalFile.

Prototype

public void copyFromLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

The src file is on the local disk.

Usage

From source file:org.apache.falcon.regression.core.util.HadoopUtil.java

License:Apache License

/**
 * Created folders in remote location according to current time and copies files here.
 * @param fs target filesystem/*  w  w  w . j ava  2  s.  co  m*/
 * @param remoteLocation remote location
 * @param localLocation source
 * @throws IOException
 */
public static void injectMoreData(FileSystem fs, final String remoteLocation, String localLocation)
        throws IOException {
    File[] files = new File(localLocation).listFiles();
    assert files != null;
    for (final File file : files) {
        if (!file.isDirectory()) {
            String path = remoteLocation + "/" + System.currentTimeMillis() / 1000 + "/";
            LOGGER.info("inserting data@ " + path);
            fs.copyFromLocalFile(new Path(file.getAbsolutePath()), new Path(path));
        }
    }

}

From source file:org.apache.falcon.regression.prism.FeedRetentionTest.java

License:Apache License

@BeforeClass(alwaysRun = true)
public void uploadWorkflow() throws Exception {
    for (FileSystem fs : serverFS) {
        fs.copyFromLocalFile(
                new Path(OSUtil.getPath(OSUtil.RESOURCES, "workflows", "impression_rc_workflow.xml")),
                new Path(impressionrcWorkflowDir + "workflow.xml"));
        HadoopUtil.uploadDir(fs, impressionrcWorkflowLibPath, OSUtil.RESOURCES_OOZIE + "lib");
    }/*  w  w  w .j  av  a2s . co  m*/
}

From source file:org.apache.flink.streaming.util.HDFSCopyFromLocal.java

License:Apache License

public static void copyFromLocal(final File localPath, final URI remotePath) throws Exception {
    // Do it in another Thread because HDFS can deadlock if being interrupted while copying
    String threadName = "HDFS Copy from " + localPath + " to " + remotePath;

    final Tuple1<Exception> asyncException = Tuple1.of(null);

    Thread copyThread = new Thread(threadName) {
        @Override//  w w  w.  j  a va  2 s .  c  om
        public void run() {
            try {
                Configuration hadoopConf = HadoopFileSystem.getHadoopConfiguration();

                FileSystem fs = FileSystem.get(remotePath, hadoopConf);

                fs.copyFromLocalFile(new Path(localPath.getAbsolutePath()), new Path(remotePath));
            } catch (Exception t) {
                asyncException.f0 = t;
            }
        }
    };

    copyThread.setDaemon(true);
    copyThread.start();
    copyThread.join();

    if (asyncException.f0 != null) {
        throw asyncException.f0;
    }
}

From source file:org.apache.flink.tez.client.TezExecutor.java

License:Apache License

private static void addLocalResource(TezConfiguration tezConf, Path jarPath, DAG dag) {

    try {/*w w w  .  j ava  2s  .c  o m*/
        org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(tezConf);

        LOG.info("Jar path received is " + jarPath.toString());

        String jarFile = jarPath.getName();

        Path remoteJarPath = null;

        /*
        if (tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR) == null) {
           LOG.info("Tez staging directory is null, setting it.");
           Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString());
           LOG.info("Setting Tez staging directory to " + stagingDir.toString());
           tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString());
           LOG.info("Set Tez staging directory to " + stagingDir.toString());
        }
        Path stagingDir = new Path(tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR));
        LOG.info("Ensuring that Tez staging directory exists");
        TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
        LOG.info("Tez staging directory exists and is " + stagingDir.toString());
        */

        Path stagingDir = TezCommonUtils.getTezBaseStagingPath(tezConf);
        LOG.info("Tez staging path is " + stagingDir);
        TezClientUtils.ensureStagingDirExists(tezConf, stagingDir);
        LOG.info("Tez staging dir exists");

        remoteJarPath = fs.makeQualified(new Path(stagingDir, jarFile));
        LOG.info("Copying " + jarPath.toString() + " to " + remoteJarPath.toString());
        fs.copyFromLocalFile(jarPath, remoteJarPath);

        FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath);
        Credentials credentials = new Credentials();
        TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, tezConf);

        Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
        LocalResource jobJar = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath),
                LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(),
                remoteJarStatus.getModificationTime());
        localResources.put(jarFile.toString(), jobJar);

        dag.addTaskLocalFiles(localResources);

        LOG.info("Added job jar as local resource.");
    } catch (Exception e) {
        System.out.println(e.getMessage());
        e.printStackTrace();
        System.exit(-1);
    }
}

From source file:org.apache.flink.util.HDFSCopyFromLocal.java

License:Apache License

public static void main(String[] args) throws Exception {
    String localBackupPath = args[0];
    String backupUri = args[1];/*from w w  w  .  j  a  v  a2s  . c  o m*/

    FileSystem fs = FileSystem.get(new URI(backupUri), new Configuration());

    fs.copyFromLocalFile(new Path(localBackupPath), new Path(backupUri));
}

From source file:org.apache.flink.yarn.Utils.java

License:Apache License

/**
 * //from  w w w. ja  v  a 2  s.c o  m
 * @return Path to remote file (usually hdfs)
 * @throws IOException
 */
public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath,
        LocalResource appMasterJar, Path homedir) throws IOException {
    // copy to HDFS
    String suffix = ".flink/" + appId + "/" + localRsrcPath.getName();

    Path dst = new Path(homedir, suffix);

    LOG.info("Copying from " + localRsrcPath + " to " + dst);
    fs.copyFromLocalFile(localRsrcPath, dst);
    registerLocalResource(fs, dst, appMasterJar);
    return dst;
}

From source file:org.apache.hama.bsp.BSPJobClient.java

License:Apache License

public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException {
    BSPJob job = pJob;/*from  w ww  .j  a va 2  s . c om*/
    job.setJobID(jobId);

    int maxTasks;
    int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask());

    ClusterStatus clusterStatus = getClusterStatus(true);
    // Re-adjust the maxTasks based on cluster status.
    if (clusterStatus != null) {
        maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks();

        if (configured > maxTasks) {
            LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with "
                    + (maxTasks) + " tasks.");
            job.setNumBspTask(maxTasks);
        }
    } else {
        maxTasks = configured;
    }

    Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36));
    Path submitSplitFile = new Path(submitJobDir, "job.split");
    Path submitJarFile = new Path(submitJobDir, "job.jar");
    Path submitJobFile = new Path(submitJobDir, "job.xml");
    LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir);

    FileSystem fs = getFs();
    // Create a number of filenames in the BSPMaster's fs namespace
    fs.delete(submitJobDir, true);
    submitJobDir = fs.makeQualified(submitJobDir);
    submitJobDir = new Path(submitJobDir.toUri().getPath());
    FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION);
    FileSystem.mkdirs(fs, submitJobDir, bspSysPerms);
    fs.mkdirs(submitJobDir);
    short replication = (short) job.getInt("bsp.submit.replication", 10);

    // only create the splits if we have an input
    if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) {
        // Create the splits for the job
        LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));

        InputSplit[] splits = job.getInputFormat().getSplits(job,
                (maxTasks > configured) ? configured : maxTasks);

        if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) {
            LOG.info("Run pre-partitioning job");
            job = partition(job, splits, maxTasks);
            maxTasks = job.getInt("hama.partition.count", maxTasks);
        }

        if (job.getBoolean("input.has.partitioned", false)) {
            splits = job.getInputFormat().getSplits(job, maxTasks);
        }

        if (maxTasks < splits.length) {
            throw new IOException(
                    "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: "
                            + splits.length + ", The number of max tasks: " + maxTasks);
        }

        int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks);
        if (numOfSplits > configured
                || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) {
            job.setNumBspTask(numOfSplits);
        }

        job.set("bsp.job.split.file", submitSplitFile.toString());
    }

    String originalJarPath = job.getJar();

    if (originalJarPath != null) { // copy jar to BSPMaster's fs
        // use jar name if job is not named.
        if ("".equals(job.getJobName())) {
            job.setJobName(new Path(originalJarPath).getName());
        }
        job.setJar(submitJarFile.toString());
        fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);

        fs.setReplication(submitJarFile, replication);
        fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION));
    } else {
        LOG.warn("No job jar file set.  User classes may not be found. "
                + "See BSPJob#setJar(String) or check Your jar file.");
    }

    // Set the user's name and working directory
    job.setUser(getUnixUserName());
    job.set("group.name", getUnixUserGroupName(job.getUser()));
    if (job.getWorkingDirectory() == null) {
        job.setWorkingDirectory(fs.getWorkingDirectory());
    }

    // Write job file to BSPMaster's fs
    FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION));

    try {
        job.writeXml(out);
    } finally {
        out.close();
    }

    return launchJob(jobId, job, submitJobFile, fs);
}

From source file:org.apache.hama.pipes.util.DistributedCacheUtil.java

License:Apache License

/**
 * Add the Files to HDFS/*from  w w w  .jav a 2 s  .  c om*/
 * 
 * @param conf The job's configuration
 * @param files Paths that should be transfered to HDFS
 */
public static String addFilesToHDFS(Configuration conf, String files) {
    if (files == null)
        return null;
    String[] fileArr = files.split(",");
    String[] finalArr = new String[fileArr.length];

    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;

        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }

        try {
            LocalFileSystem local = LocalFileSystem.getLocal(conf);
            Path pathSrc = new Path(pathURI);
            // LOG.info("pathSrc: " + pathSrc);

            if (local.exists(pathSrc)) {
                FileSystem hdfs = FileSystem.get(conf);
                Path pathDst = new Path(hdfs.getWorkingDirectory() + "/temp", pathSrc.getName());

                // LOG.info("WorkingDirectory: " + hdfs.getWorkingDirectory());
                LOG.debug("copyToHDFSFile: " + pathDst);
                hdfs.copyFromLocalFile(pathSrc, pathDst);
                hdfs.deleteOnExit(pathDst);

                finalPath = pathDst.makeQualified(hdfs).toString();
                finalArr[i] = finalPath;
            }
        } catch (IOException e) {
            LOG.error(e);
        }

    }
    return StringUtils.arrayToString(finalArr);
}

From source file:org.apache.hcatalog.hcatmix.load.hadoop.HadoopUtils.java

License:Apache License

/**
 * Walks through the classpath and uploads the files in HDFS and also adds them to the JobConf object.
 * The upload files are marked for deletion upon exit. So they gets deleted when the job finishes
 *
 * @param jobConf//from   w w  w .j  a  va  2 s.  c  o m
 * @param tmpDir  where all the files would be copied to
 * @throws IOException
 */
public static void uploadClasspathAndAddToJobConf(JobConf jobConf, Path tmpDir) throws IOException {
    final String[] jars = findFilesInCP(
            new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") });
    final FileSystem fs = FileSystem.get(jobConf);
    for (String jarFile : jars) {
        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = getTmpFileName(tmpDir, filename);
        fs.deleteOnExit(tmpJarFilePath);
        fs.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
        DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf);
    }
    DistributedCache.createSymlink(jobConf);
}

From source file:org.apache.hive.jdbc.TestJdbcWithLocalClusterSpark.java

License:Apache License

@Test
public void testPermFunc() throws Exception {

    // This test assumes the hive-contrib JAR has been built as part of the Hive build.
    // Also dependent on the UDFExampleAdd class within that JAR.
    String udfClassName = "org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd";
    String mvnRepo = System.getProperty("maven.local.repository");
    String hiveVersion = System.getProperty("hive.version");
    String jarFileName = "hive-contrib-" + hiveVersion + ".jar";
    String[] pathParts = { "org", "apache", "hive", "hive-contrib", hiveVersion, jarFileName };

    // Create path to hive-contrib JAR on local filesystem
    Path contribJarPath = new Path(mvnRepo);
    for (String pathPart : pathParts) {
        contribJarPath = new Path(contribJarPath, pathPart);
    }/*from ww  w .  j  a v  a  2s .  co m*/
    FileSystem localFs = FileSystem.getLocal(conf);
    assertTrue("Hive contrib JAR exists at " + contribJarPath, localFs.exists(contribJarPath));

    String hdfsJarPathStr = "hdfs:///" + jarFileName;
    Path hdfsJarPath = new Path(hdfsJarPathStr);

    // Copy JAR to DFS
    FileSystem dfs = miniHS2.getDFS().getFileSystem();
    dfs.copyFromLocalFile(contribJarPath, hdfsJarPath);
    assertTrue("Verify contrib JAR copied to HDFS at " + hdfsJarPath, dfs.exists(hdfsJarPath));

    // Register function
    String queryStr = "CREATE FUNCTION example_add AS '" + udfClassName + "'" + " USING JAR '" + hdfsJarPathStr
            + "'";
    stmt.execute(queryStr);

    // Call describe
    ResultSet res;
    res = stmt.executeQuery("DESCRIBE FUNCTION " + dbName + ".example_add");
    checkForNotExist(res);

    // Use UDF in query
    String tableName = "testTab3";
    setupKv1Tabs(tableName);
    res = stmt.executeQuery("SELECT EXAMPLE_ADD(1, 2) FROM " + tableName + " LIMIT 1");
    assertTrue("query has results", res.next());
    assertEquals(3, res.getInt(1));
    assertFalse("no more results", res.next());

    // A new connection should be able to call describe/use function without issue
    Connection conn2 = DriverManager.getConnection(miniHS2.getJdbcURL(dbName), System.getProperty("user.name"),
            "bar");
    Statement stmt2 = conn2.createStatement();
    stmt2.execute("USE " + dbName);
    res = stmt2.executeQuery("DESCRIBE FUNCTION " + dbName + ".example_add");
    checkForNotExist(res);

    res = stmt2.executeQuery("SELECT " + dbName + ".example_add(1, 1) FROM " + tableName + " LIMIT 1");
    assertTrue("query has results", res.next());
    assertEquals(2, res.getInt(1));
    assertFalse("no more results", res.next());

    stmt.execute("DROP TABLE " + tableName);
}