List of usage examples for org.apache.hadoop.fs FileSystem copyFromLocalFile
public void copyFromLocalFile(Path src, Path dst) throws IOException
From source file:org.apache.falcon.regression.core.util.HadoopUtil.java
License:Apache License
/** * Created folders in remote location according to current time and copies files here. * @param fs target filesystem/* w w w . j ava 2 s. co m*/ * @param remoteLocation remote location * @param localLocation source * @throws IOException */ public static void injectMoreData(FileSystem fs, final String remoteLocation, String localLocation) throws IOException { File[] files = new File(localLocation).listFiles(); assert files != null; for (final File file : files) { if (!file.isDirectory()) { String path = remoteLocation + "/" + System.currentTimeMillis() / 1000 + "/"; LOGGER.info("inserting data@ " + path); fs.copyFromLocalFile(new Path(file.getAbsolutePath()), new Path(path)); } } }
From source file:org.apache.falcon.regression.prism.FeedRetentionTest.java
License:Apache License
@BeforeClass(alwaysRun = true) public void uploadWorkflow() throws Exception { for (FileSystem fs : serverFS) { fs.copyFromLocalFile( new Path(OSUtil.getPath(OSUtil.RESOURCES, "workflows", "impression_rc_workflow.xml")), new Path(impressionrcWorkflowDir + "workflow.xml")); HadoopUtil.uploadDir(fs, impressionrcWorkflowLibPath, OSUtil.RESOURCES_OOZIE + "lib"); }/* w w w .j av a2s . co m*/ }
From source file:org.apache.flink.streaming.util.HDFSCopyFromLocal.java
License:Apache License
public static void copyFromLocal(final File localPath, final URI remotePath) throws Exception { // Do it in another Thread because HDFS can deadlock if being interrupted while copying String threadName = "HDFS Copy from " + localPath + " to " + remotePath; final Tuple1<Exception> asyncException = Tuple1.of(null); Thread copyThread = new Thread(threadName) { @Override// w w w. j a va 2 s . c om public void run() { try { Configuration hadoopConf = HadoopFileSystem.getHadoopConfiguration(); FileSystem fs = FileSystem.get(remotePath, hadoopConf); fs.copyFromLocalFile(new Path(localPath.getAbsolutePath()), new Path(remotePath)); } catch (Exception t) { asyncException.f0 = t; } } }; copyThread.setDaemon(true); copyThread.start(); copyThread.join(); if (asyncException.f0 != null) { throw asyncException.f0; } }
From source file:org.apache.flink.tez.client.TezExecutor.java
License:Apache License
private static void addLocalResource(TezConfiguration tezConf, Path jarPath, DAG dag) { try {/*w w w . j ava 2s .c o m*/ org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.get(tezConf); LOG.info("Jar path received is " + jarPath.toString()); String jarFile = jarPath.getName(); Path remoteJarPath = null; /* if (tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR) == null) { LOG.info("Tez staging directory is null, setting it."); Path stagingDir = new Path(fs.getWorkingDirectory(), UUID.randomUUID().toString()); LOG.info("Setting Tez staging directory to " + stagingDir.toString()); tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stagingDir.toString()); LOG.info("Set Tez staging directory to " + stagingDir.toString()); } Path stagingDir = new Path(tezConf.get(TezConfiguration.TEZ_AM_STAGING_DIR)); LOG.info("Ensuring that Tez staging directory exists"); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); LOG.info("Tez staging directory exists and is " + stagingDir.toString()); */ Path stagingDir = TezCommonUtils.getTezBaseStagingPath(tezConf); LOG.info("Tez staging path is " + stagingDir); TezClientUtils.ensureStagingDirExists(tezConf, stagingDir); LOG.info("Tez staging dir exists"); remoteJarPath = fs.makeQualified(new Path(stagingDir, jarFile)); LOG.info("Copying " + jarPath.toString() + " to " + remoteJarPath.toString()); fs.copyFromLocalFile(jarPath, remoteJarPath); FileStatus remoteJarStatus = fs.getFileStatus(remoteJarPath); Credentials credentials = new Credentials(); TokenCache.obtainTokensForNamenodes(credentials, new Path[] { remoteJarPath }, tezConf); Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>(); LocalResource jobJar = LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(remoteJarPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, remoteJarStatus.getLen(), remoteJarStatus.getModificationTime()); localResources.put(jarFile.toString(), jobJar); dag.addTaskLocalFiles(localResources); LOG.info("Added job jar as local resource."); } catch (Exception e) { System.out.println(e.getMessage()); e.printStackTrace(); System.exit(-1); } }
From source file:org.apache.flink.util.HDFSCopyFromLocal.java
License:Apache License
public static void main(String[] args) throws Exception { String localBackupPath = args[0]; String backupUri = args[1];/*from w w w . j a v a2s . c o m*/ FileSystem fs = FileSystem.get(new URI(backupUri), new Configuration()); fs.copyFromLocalFile(new Path(localBackupPath), new Path(backupUri)); }
From source file:org.apache.flink.yarn.Utils.java
License:Apache License
/** * //from w w w. ja v a 2 s.c o m * @return Path to remote file (usually hdfs) * @throws IOException */ public static Path setupLocalResource(Configuration conf, FileSystem fs, String appId, Path localRsrcPath, LocalResource appMasterJar, Path homedir) throws IOException { // copy to HDFS String suffix = ".flink/" + appId + "/" + localRsrcPath.getName(); Path dst = new Path(homedir, suffix); LOG.info("Copying from " + localRsrcPath + " to " + dst); fs.copyFromLocalFile(localRsrcPath, dst); registerLocalResource(fs, dst, appMasterJar); return dst; }
From source file:org.apache.hama.bsp.BSPJobClient.java
License:Apache License
public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException { BSPJob job = pJob;/*from w ww .j a va 2 s . c om*/ job.setJobID(jobId); int maxTasks; int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask()); ClusterStatus clusterStatus = getClusterStatus(true); // Re-adjust the maxTasks based on cluster status. if (clusterStatus != null) { maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks(); if (configured > maxTasks) { LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with " + (maxTasks) + " tasks."); job.setNumBspTask(maxTasks); } } else { maxTasks = configured; } Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36)); Path submitSplitFile = new Path(submitJobDir, "job.split"); Path submitJarFile = new Path(submitJobDir, "job.jar"); Path submitJobFile = new Path(submitJobDir, "job.xml"); LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir); FileSystem fs = getFs(); // Create a number of filenames in the BSPMaster's fs namespace fs.delete(submitJobDir, true); submitJobDir = fs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION); FileSystem.mkdirs(fs, submitJobDir, bspSysPerms); fs.mkdirs(submitJobDir); short replication = (short) job.getInt("bsp.submit.replication", 10); // only create the splits if we have an input if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) { // Create the splits for the job LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile)); InputSplit[] splits = job.getInputFormat().getSplits(job, (maxTasks > configured) ? configured : maxTasks); if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) { LOG.info("Run pre-partitioning job"); job = partition(job, splits, maxTasks); maxTasks = job.getInt("hama.partition.count", maxTasks); } if (job.getBoolean("input.has.partitioned", false)) { splits = job.getInputFormat().getSplits(job, maxTasks); } if (maxTasks < splits.length) { throw new IOException( "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: " + splits.length + ", The number of max tasks: " + maxTasks); } int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks); if (numOfSplits > configured || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) { job.setNumBspTask(numOfSplits); } job.set("bsp.job.split.file", submitSplitFile.toString()); } String originalJarPath = job.getJar(); if (originalJarPath != null) { // copy jar to BSPMaster's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(originalJarPath).getName()); } job.setJar(submitJarFile.toString()); fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile); fs.setReplication(submitJarFile, replication); fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION)); } else { LOG.warn("No job jar file set. User classes may not be found. " + "See BSPJob#setJar(String) or check Your jar file."); } // Set the user's name and working directory job.setUser(getUnixUserName()); job.set("group.name", getUnixUserGroupName(job.getUser())); if (job.getWorkingDirectory() == null) { job.setWorkingDirectory(fs.getWorkingDirectory()); } // Write job file to BSPMaster's fs FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION)); try { job.writeXml(out); } finally { out.close(); } return launchJob(jobId, job, submitJobFile, fs); }
From source file:org.apache.hama.pipes.util.DistributedCacheUtil.java
License:Apache License
/** * Add the Files to HDFS/*from w w w .jav a 2 s . c om*/ * * @param conf The job's configuration * @param files Paths that should be transfered to HDFS */ public static String addFilesToHDFS(Configuration conf, String files) { if (files == null) return null; String[] fileArr = files.split(","); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; URI pathURI; try { pathURI = new URI(tmp); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } try { LocalFileSystem local = LocalFileSystem.getLocal(conf); Path pathSrc = new Path(pathURI); // LOG.info("pathSrc: " + pathSrc); if (local.exists(pathSrc)) { FileSystem hdfs = FileSystem.get(conf); Path pathDst = new Path(hdfs.getWorkingDirectory() + "/temp", pathSrc.getName()); // LOG.info("WorkingDirectory: " + hdfs.getWorkingDirectory()); LOG.debug("copyToHDFSFile: " + pathDst); hdfs.copyFromLocalFile(pathSrc, pathDst); hdfs.deleteOnExit(pathDst); finalPath = pathDst.makeQualified(hdfs).toString(); finalArr[i] = finalPath; } } catch (IOException e) { LOG.error(e); } } return StringUtils.arrayToString(finalArr); }
From source file:org.apache.hcatalog.hcatmix.load.hadoop.HadoopUtils.java
License:Apache License
/** * Walks through the classpath and uploads the files in HDFS and also adds them to the JobConf object. * The upload files are marked for deletion upon exit. So they gets deleted when the job finishes * * @param jobConf//from w w w .j a va 2 s. c o m * @param tmpDir where all the files would be copied to * @throws IOException */ public static void uploadClasspathAndAddToJobConf(JobConf jobConf, Path tmpDir) throws IOException { final String[] jars = findFilesInCP( new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") }); final FileSystem fs = FileSystem.get(jobConf); for (String jarFile : jars) { Path srcJarFilePath = new Path("file:///" + jarFile); String filename = srcJarFilePath.getName(); Path tmpJarFilePath = getTmpFileName(tmpDir, filename); fs.deleteOnExit(tmpJarFilePath); fs.copyFromLocalFile(srcJarFilePath, tmpJarFilePath); DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf); } DistributedCache.createSymlink(jobConf); }
From source file:org.apache.hive.jdbc.TestJdbcWithLocalClusterSpark.java
License:Apache License
@Test public void testPermFunc() throws Exception { // This test assumes the hive-contrib JAR has been built as part of the Hive build. // Also dependent on the UDFExampleAdd class within that JAR. String udfClassName = "org.apache.hadoop.hive.contrib.udf.example.UDFExampleAdd"; String mvnRepo = System.getProperty("maven.local.repository"); String hiveVersion = System.getProperty("hive.version"); String jarFileName = "hive-contrib-" + hiveVersion + ".jar"; String[] pathParts = { "org", "apache", "hive", "hive-contrib", hiveVersion, jarFileName }; // Create path to hive-contrib JAR on local filesystem Path contribJarPath = new Path(mvnRepo); for (String pathPart : pathParts) { contribJarPath = new Path(contribJarPath, pathPart); }/*from ww w . j a v a 2s . co m*/ FileSystem localFs = FileSystem.getLocal(conf); assertTrue("Hive contrib JAR exists at " + contribJarPath, localFs.exists(contribJarPath)); String hdfsJarPathStr = "hdfs:///" + jarFileName; Path hdfsJarPath = new Path(hdfsJarPathStr); // Copy JAR to DFS FileSystem dfs = miniHS2.getDFS().getFileSystem(); dfs.copyFromLocalFile(contribJarPath, hdfsJarPath); assertTrue("Verify contrib JAR copied to HDFS at " + hdfsJarPath, dfs.exists(hdfsJarPath)); // Register function String queryStr = "CREATE FUNCTION example_add AS '" + udfClassName + "'" + " USING JAR '" + hdfsJarPathStr + "'"; stmt.execute(queryStr); // Call describe ResultSet res; res = stmt.executeQuery("DESCRIBE FUNCTION " + dbName + ".example_add"); checkForNotExist(res); // Use UDF in query String tableName = "testTab3"; setupKv1Tabs(tableName); res = stmt.executeQuery("SELECT EXAMPLE_ADD(1, 2) FROM " + tableName + " LIMIT 1"); assertTrue("query has results", res.next()); assertEquals(3, res.getInt(1)); assertFalse("no more results", res.next()); // A new connection should be able to call describe/use function without issue Connection conn2 = DriverManager.getConnection(miniHS2.getJdbcURL(dbName), System.getProperty("user.name"), "bar"); Statement stmt2 = conn2.createStatement(); stmt2.execute("USE " + dbName); res = stmt2.executeQuery("DESCRIBE FUNCTION " + dbName + ".example_add"); checkForNotExist(res); res = stmt2.executeQuery("SELECT " + dbName + ".example_add(1, 1) FROM " + tableName + " LIMIT 1"); assertTrue("query has results", res.next()); assertEquals(2, res.getInt(1)); assertFalse("no more results", res.next()); stmt.execute("DROP TABLE " + tableName); }