List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile
public void copyToLocalFile(Path src, Path dst) throws IOException
From source file:com.cloudera.livy.rsc.driver.RSCDriver.java
License:Apache License
public File copyFileToLocal(File localCopyDir, String filePath, SparkContext sc) throws Exception { synchronized (jc) { if (!localCopyDir.isDirectory() && !localCopyDir.mkdir()) { throw new IOException("Failed to create directory to add pyFile"); }/*from w w w . j ava 2 s .com*/ } URI uri = new URI(filePath); String name = uri.getFragment() != null ? uri.getFragment() : uri.getPath(); name = new File(name).getName(); File localCopy = new File(localCopyDir, name); if (localCopy.exists()) { throw new IOException(String.format("A file with name %s has " + "already been uploaded.", name)); } Configuration conf = sc.hadoopConfiguration(); FileSystem fs = FileSystem.get(uri, conf); fs.copyToLocalFile(new Path(uri), new Path(localCopy.toURI())); return localCopy; }
From source file:com.dasasian.chok.node.ShardManager.java
License:Apache License
private void installShard(String shardName, String shardPath, File localShardFolder) throws ChokException { LOG.info("install shard '" + shardName + "' from " + shardPath); // TODO sg: to fix HADOOP-4422 we try to download the shard 5 times int maxTries = 5; for (int i = 0; i < maxTries; i++) { URI uri;/*from ww w .j a va 2 s . c o m*/ try { uri = new URI(shardPath); FileSystem fileSystem = FileSystem.get(uri, new Configuration()); if (throttleSemaphore != null) { fileSystem = new ThrottledFileSystem(fileSystem, throttleSemaphore); } final Path path = new Path(shardPath); boolean isZip = fileSystem.isFile(path) && shardPath.endsWith(".zip"); File shardTmpFolder = new File(localShardFolder.getAbsolutePath() + "_tmp"); try { FileUtil.deleteFolder(localShardFolder); FileUtil.deleteFolder(shardTmpFolder); if (isZip) { FileUtil.unzip(path, shardTmpFolder, fileSystem, "true".equalsIgnoreCase(System.getProperty("chok.spool.zip.shards", "false"))); } else { fileSystem.copyToLocalFile(path, new Path(shardTmpFolder.getAbsolutePath())); } shardTmpFolder.renameTo(localShardFolder); } finally { // Ensure that the tmp folder is deleted on an error FileUtil.deleteFolder(shardTmpFolder); } // Looks like we were successful. if (i > 0) { LOG.error("Loaded shard:" + shardPath); } return; } catch (final URISyntaxException e) { throw new ChokException("Can not parse uri for path: " + shardPath, e); } catch (final Exception e) { LOG.error(String.format("Error loading shard: %s (try %d of %d)", shardPath, i, maxTries), e); if (i >= maxTries - 1) { throw new ChokException("Can not load shard: " + shardPath, e); } } } }
From source file:com.dasasian.chok.util.FileUtil.java
License:Apache License
/** * Simply unzips the content from the source zip to the target folder. The * first level folder of the zip content is removed. * * @param sourceZip the path to the source zip file, hadoop's IO services are used to * open this path/*from ww w . j a v a 2 s. c om*/ * @param targetFolder The directory that the zip file will be unpacked into * @param fileSystem the hadoop file system object to use to open * <code>sourceZip</code> * @param localSpool If true, the zip file is copied to the local file system before * being unzipped. The name used is <code>targetFolder.zip</code>. If * false, the unzip is streamed. */ public static void unzip(final Path sourceZip, final File targetFolder, final FileSystem fileSystem, final boolean localSpool) { try { if (localSpool) { targetFolder.mkdirs(); final File shardZipLocal = new File(targetFolder + ".zip"); if (shardZipLocal.exists()) { // make sure we overwrite cleanly shardZipLocal.delete(); } try { fileSystem.copyToLocalFile(sourceZip, new Path(shardZipLocal.getAbsolutePath())); FileUtil.unzip(shardZipLocal, targetFolder); } finally { shardZipLocal.delete(); } } else { FSDataInputStream fis = fileSystem.open(sourceZip); try { ZipInputStream zis = new ZipInputStream(fis); unzip(zis, targetFolder); } finally { if (fis != null) { try { fis.close(); } catch (Exception ignore) { // ignore } } } } } catch (IOException e) { throw new RuntimeException("unable to expand upgrade files for " + sourceZip + " to " + targetFolder, e); } }
From source file:com.datatorrent.stram.cli.DTCli.java
License:Apache License
private File copyToLocal(String[] files) throws IOException { File tmpDir = new File("/tmp/datatorrent/" + ManagementFactory.getRuntimeMXBean().getName()); tmpDir.mkdirs();/*from w w w .j a v a 2 s . co m*/ for (int i = 0; i < files.length; i++) { try { URI uri = new URI(files[i]); String scheme = uri.getScheme(); if (scheme == null || scheme.equals("file")) { files[i] = uri.getPath(); } else { FileSystem tmpFs = FileSystem.newInstance(uri, conf); try { Path srcPath = new Path(uri.getPath()); Path dstPath = new Path(tmpDir.getAbsolutePath(), String.valueOf(i) + srcPath.getName()); tmpFs.copyToLocalFile(srcPath, dstPath); files[i] = dstPath.toUri().getPath(); } finally { tmpFs.close(); } } } catch (URISyntaxException ex) { throw new RuntimeException(ex); } } return tmpDir; }
From source file:com.datatorrent.stram.client.StramAppLauncher.java
License:Apache License
public StramAppLauncher(FileSystem fs, Path path, Configuration conf) throws Exception { File jarsDir = new File(StramClientUtils.getUserDTDirectory(), "jars"); jarsDir.mkdirs();/*ww w . j a va 2 s .com*/ File localJarFile = new File(jarsDir, path.getName()); this.fs = fs; fs.copyToLocalFile(path, new Path(localJarFile.getAbsolutePath())); this.jarFile = localJarFile; this.propertiesBuilder = new LogicalPlanConfiguration(conf); init(this.jarFile.getName()); }
From source file:com.datatorrent.stram.client.StramClientUtils.java
License:Apache License
public static Configuration addDTSiteResources(Configuration conf) { addDTLocalResources(conf);/*from ww w . j a va 2 s .c o m*/ FileSystem fs = null; File targetGlobalFile; try { fs = newFileSystemInstance(conf); // after getting the dfsRootDirectory config parameter, redo the entire process with the global config // load global settings from DFS targetGlobalFile = new File(String.format("/tmp/dt-site-global-%s.xml", UserGroupInformation.getLoginUser().getShortUserName())); org.apache.hadoop.fs.Path hdfsGlobalPath = new org.apache.hadoop.fs.Path( StramClientUtils.getDTDFSConfigDir(fs, conf), StramClientUtils.DT_SITE_GLOBAL_XML_FILE); LOG.debug("Copying global dt-site.xml from {} to {}", hdfsGlobalPath, targetGlobalFile.getAbsolutePath()); fs.copyToLocalFile(hdfsGlobalPath, new org.apache.hadoop.fs.Path(targetGlobalFile.toURI())); addDTSiteResources(conf, targetGlobalFile); if (!isDevelopmentMode()) { // load node local config file addDTSiteResources(conf, new File(StramClientUtils.getConfigDir(), StramClientUtils.DT_SITE_XML_FILE)); } // load user config file addDTSiteResources(conf, new File(StramClientUtils.getUserDTDirectory(), StramClientUtils.DT_SITE_XML_FILE)); } catch (IOException ex) { // ignore LOG.debug("Caught exception when loading configuration: {}: moving on...", ex.getMessage()); } finally { // Cannot delete the file here because addDTSiteResource which eventually calls Configuration.reloadConfiguration // does not actually reload the configuration. The file is actually read later and it needs to exist. // //if (targetGlobalFile != null) { //targetGlobalFile.delete(); //} IOUtils.closeQuietly(fs); } //Validate loggers-level settings String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { String targets[] = loggersLevel.split(","); Preconditions.checkArgument(targets.length > 0, "zero loggers level"); for (String target : targets) { String parts[] = target.split(":"); Preconditions.checkArgument(parts.length == 2, "incorrect " + target); Preconditions.checkArgument(ConfigValidator.validateLoggersLevel(parts[0], parts[1]), "incorrect " + target); } } convertDeprecatedProperties(conf); // // The ridiculous default RESOURCEMANAGER_CONNECT_MAX_WAIT_MS from hadoop is 15 minutes (!!!!), which actually translates to 20 minutes with the connect interval. // That means if there is anything wrong with YARN or if YARN is not running, the caller has to wait for up to 20 minutes until it gets an error. // We are overriding this to be 10 seconds maximum. // int rmConnectMaxWait = conf.getInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); if (rmConnectMaxWait > RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE) { LOG.info("Overriding {} assigned value of {} to {} because the assigned value is too big.", YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, rmConnectMaxWait, RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE); conf.setInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE); int rmConnectRetryInterval = conf.getInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS); int defaultRetryInterval = Math.max(500, RESOURCEMANAGER_CONNECT_MAX_WAIT_MS_OVERRIDE / 5); if (rmConnectRetryInterval > defaultRetryInterval) { LOG.info("Overriding {} assigned value of {} to {} because the assigned value is too big.", YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, rmConnectRetryInterval, defaultRetryInterval); conf.setInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, defaultRetryInterval); } } LOG.info(" conf object in stramclient {}", conf); return conf; }
From source file:com.ery.dimport.daemon.TaskManager.java
License:Apache License
public void runTask(final TaskInfo task) { List<LogHostRunInfoPO> allFiles = new ArrayList<LogHostRunInfoPO>(); try {/*from w w w. j a v a2 s. c om*/ task.START_TIME = new Date(System.currentTimeMillis()); boolean needUpdate = false; TaskInfo exists = allTask.get(task.TASK_ID); if (exists == null) { needUpdate = true; } else { task.hosts = exists.hosts; } if (task.hosts == null || task.hosts.size() == 0) { task.hosts = new ArrayList<String>(master.getServerManager().getOnlineServers().keySet()); needUpdate = true; } if (ZKUtil.checkExists(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID) == -1) { needUpdate = true; } if (needUpdate) { try { task.HOST_SIZE = task.hosts.size(); master.logWriter.writeLog(task); ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID, DImportConstant.Serialize(task)); } catch (Throwable e) { } } Thread thread = Thread.currentThread(); ProcessInfo procInfo = null; synchronized (taskInProgress) { procInfo = taskInProgress.get(task.getRunTaskId()); } procInfo.thread = thread; procInfo.startTime = System.currentTimeMillis(); procInfo.startTime = System.currentTimeMillis(); String filePath = task.FILE_PATH; boolean isInHdfs = false; final Map<String, Long> files = new HashMap<String, Long>(); String tmpPath = conf.get(DImportConstant.DIMPORT_PROCESS_TMPDATA_DIR, System.getProperty("user.home")); if (tmpPath.endsWith("/")) { tmpPath = tmpPath.substring(0, tmpPath.length() - 1); } if (filePath == null || filePath.equals("")) { files.put("", 0l); } else { if (task.fileNamePattern != null || (task.FILE_FILTER != null && !task.FILE_FILTER.equals(""))) { task.FILE_FILTER = DImportConstant.macroProcess(task.FILE_FILTER); task.FILE_FILTER = task.FILE_FILTER.replaceAll("\\{host\\}", this.master.hostName); task.fileNamePattern = Pattern.compile(task.FILE_FILTER); } Matcher m = hdfsUrlPattern.matcher(filePath); if (m.matches()) { isInHdfs = true; filePath = m.group(2); // for (String string : conf.getValByRegex(".*").keySet()) { // System.out.println(string + "=" + conf.get(string)); // } Path dirPath = new Path(filePath); FileSystem fs = FileSystem.get(HadoopConf.getConf(conf)); if (!fs.exists(dirPath) || !fs.isDirectory(dirPath)) { throw new IOException("HDFS? " + filePath + "?,?"); } FileStatus[] hFiles = fs.listStatus(dirPath, new PathFilter() { @Override public boolean accept(Path name) { if (task.fileNamePattern != null) { System.out.println("hdfs listStatus:" + name.getParent() + "/" + name.getName()); return task.fileNamePattern.matcher(name.getName()).matches(); } else { return true; } } }); for (int i = 0; i < hFiles.length; i++) { files.put(hFiles[i].getPath().toString(), hFiles[i].getLen()); } } else { java.io.File f = new File(filePath); if (!f.exists() || !f.isDirectory()) { throw new IOException( "? " + filePath + "? ,?"); } File[] lFiles = f.listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { if (task.fileNamePattern != null) { System.out.println("local fs listStatus:" + dir + "/" + name); return task.fileNamePattern.matcher(name).matches(); } else { return true; } } }); for (int i = 0; i < lFiles.length; i++) { files.put(lFiles[i].getAbsolutePath(), lFiles[i].length()); } } } for (String fileName : files.keySet()) { LogHostRunInfoPO runInfo = new LogHostRunInfoPO(task); runInfo.RUN_LOG_ID = DImportConstant.shdf.format(task.SUBMIT_TIME) + "_" + allFiles.size() + "_" + fileName.hashCode(); runInfo.FILE_NAME = fileName; runInfo.RETURN_CODE = 255; runInfo.IS_RUN_SUCCESS = -1; runInfo.FILE_SIZE = files.get(fileName); runInfo.HOST_NAME = master.hostName; String localFile = fileName; if (isInHdfs) {// localFile = tmpPath + "/" + fileName.substring(fileName.lastIndexOf("/") + 1); } // String[] cmds = procInfo.task.getCommand(); for (int j = 0; j < cmds.length; j++) { cmds[j] = DImportConstant.macroProcess(cmds[j]); cmds[j] = cmds[j].replaceAll("\\{file\\}", localFile); cmds[j] = cmds[j].replaceAll("\\{host\\}", master.hostName); } runInfo.RUN_COMMAND = StringUtils.join(" ", cmds); master.logWriter.writeLog(runInfo); LOG.info("??" + runInfo); allFiles.add(runInfo); } ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, DImportConstant.Serialize(allFiles)); for (LogHostRunInfoPO runInfo : allFiles) { if (procInfo.stoped) break; String fileName = runInfo.FILE_NAME; LOG.info("?:" + fileName); procInfo.RUN_LOG_ID = runInfo.RUN_LOG_ID; runInfo.START_TIME = new Date(System.currentTimeMillis()); procInfo.processFile = fileName; String localFile = fileName; try { if (isInHdfs) {// localFile = tmpPath + "/" + fileName.substring(fileName.lastIndexOf("/") + 1); } procInfo.task.TASK_COMMAND = runInfo.RUN_COMMAND; if (isInHdfs) {// File lf = new File(localFile); if (lf.exists()) lf.delete(); FileSystem fs = FileSystem.get(HadoopConf.getConf(conf)); LOG.info("HDFS:" + fileName + "===>" + localFile); long btime = System.currentTimeMillis(); fs.copyToLocalFile(new Path(fileName), new Path(localFile)); LOG.info("HDFS?:" + fileName + "===>" + localFile); runInfo.downTime = System.currentTimeMillis() - btime; fileName = localFile; } updateHostInfoLog(runInfo, allFiles); LOG.info(procInfo.task.TASK_NAME + " commandline: " + procInfo.task.TASK_COMMAND); procInfo.proc = execResult(runInfo.RUN_COMMAND); runInfo.IS_RUN_SUCCESS = 1; runInfo.RETURN_CODE = writeProcessLog(procInfo); LOG.info(procInfo.task.TASK_NAME + " return value: " + runInfo.RETURN_CODE); // runInfo.RETURN_CODE = procInfo.proc.exitValue(); } catch (Throwable e) { runInfo.ERROR_MSG = e.getMessage(); if (procInfo.proc != null) { try { procInfo.proc.destroy(); } catch (Exception ex) { } } procInfo.proc = null; LOG.error("", e); } finally { // runInfo.END_TIME = new Date(System.currentTimeMillis()); master.logWriter.updateLog(runInfo); updateHostInfoLog(runInfo, allFiles); ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, DImportConstant.Serialize(allFiles)); if (isInHdfs) { File lf = new File(localFile); if (lf.exists()) lf.delete(); } } } } catch (Throwable e) { LOG.error("" + task, e); try { if (allFiles.size() > 0) { for (LogHostRunInfoPO logHostRunInfoPO : allFiles) { if (logHostRunInfoPO.END_TIME.getTime() < 10000) { logHostRunInfoPO.END_TIME = new Date(System.currentTimeMillis()); logHostRunInfoPO.IS_RUN_SUCCESS = 1; logHostRunInfoPO.RETURN_CODE = 2; } } ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, DImportConstant.Serialize(allFiles)); } } catch (KeeperException e1) { LOG.error("update task run info on host :" + watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, e); } catch (IOException e1) { LOG.error("update task run info on host " + watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, e); } } finally { // synchronized (taskInProgress) { taskInProgress.remove(task.getRunTaskId()); } } }
From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java
License:Open Source License
@Override public ReturnType get() { try {//from www .ja va2 s. c om job.waitForCompletion(true); if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.COUNTER) { return (ReturnType) Long .valueOf(job.getCounters().findCounter(MapperInterface.Counters.ROWS).getValue()); } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.SGID) { SGID resultSGID = outputSet.getSGID(); Class<? extends Atom> resultClass = (Class<? extends Atom>) mapReducePlugin.getResultClass(); return (ReturnType) SWQEFactory.getQueryInterface().getLatestAtomBySGID(resultSGID, resultClass); } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.BATCHEDFEATURESET) { FeatureSet build = updateAndGet(outputSet); return (ReturnType) build; } else if (mapReducePlugin.getResultMechanism() == PluginInterface.ResultMechanism.FILE) { Path outputPath = TextOutputFormat.getOutputPath(job); FileSystem fs = FileSystem.get(job.getConfiguration()); Path localPath = new Path(Files.createTempDir().toURI()); fs.copyToLocalFile(outputPath, localPath); File outputFile = new File(localPath.toUri()); return (ReturnType) outputFile; } else { throw new UnsupportedOperationException(); } } catch (IOException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex); } catch (InterruptedException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex); } catch (ClassNotFoundException ex) { Logger.getLogger(MRHBasePluginRunner.class.getName()).error(null, ex); } return null; }
From source file:com.linkedin.mr_kluj.GenericClojureJob.java
License:Apache License
static File getScriptFromPath(Configuration conf, String path) throws Exception { FileSystem fs = FileSystem.get(new URI(path), conf); File localCopyOfPath = File.createTempFile("resources", "tmp"); fs.copyToLocalFile(new Path(path), new Path(localCopyOfPath.getAbsolutePath())); return localCopyOfPath; }
From source file:com.pegasus.ResultInfo.java
License:Apache License
public int run(final String[] args) throws Exception { Configuration conf = getConf(); final FileSystem fs = FileSystem.get(conf); edge_path = new Path(conf.get("edge_path")); all_vertices = new Path(conf.get("all_vertices")); curbm_path = new Path(conf.get("iteration_state")); tempbm_path = new Path(conf.get("stage1out")); nextbm_path = new Path(conf.get("stage2out")); output_path = new Path(conf.get("stage3out")); grapherOut_path = new Path(conf.get("grapherout")); nreducers = Integer.parseInt(conf.get("num_reducers")); local_output_path = conf.get("local_output"); // initital cleanup fs.delete(tempbm_path, true);/* w ww . j av a 2 s.c o m*/ fs.delete(nextbm_path, true); fs.delete(output_path, true); fs.delete(curbm_path, true); fs.delete(grapherOut_path, true); FileUtil.fullyDelete(new File(local_output_path)); fs.mkdirs(curbm_path); //fs.mkdirs(grapherOut_path); FileStatus[] statusArray = fs.listStatus(all_vertices); for (int index = 0; index < statusArray.length; index++) { Path temp = statusArray[index].getPath(); FileUtil.copy(fs, temp, fs, curbm_path, false, conf); } make_symmetric = 1; System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n"); // Iteratively calculate neighborhood function. // rotate directory for (int i = cur_iter; i < MAX_ITERATIONS; i++) { cur_iter++; System.out.println("configStage1"); JobClient.runJob(configStage1()); System.out.println("configStage2"); JobClient.runJob(configStage2()); System.out.println("configStage3"); JobClient.runJob(configStage3()); FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // copy neighborhood information from HDFS to local disk, and read it! String new_path = local_output_path + "/" + i; fs.copyToLocalFile(output_path, new Path(new_path)); ResultInfo ri = readIterationOutput(new_path); changed_nodes[iter_counter] = ri.changed; changed_nodes[iter_counter] = ri.unchanged; iter_counter++; System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged); fs.delete(curbm_path); fs.delete(tempbm_path); fs.delete(output_path); fs.rename(nextbm_path, curbm_path); // Stop when the minimum neighborhood doesn't change if (ri.changed == 0) { System.out.println("All the component ids converged. Finishing..."); fs.rename(curbm_path, grapherOut_path); break; } } FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path)); // finishing. System.out.println("\n[PEGASUS] Connected component computed."); System.out.println("[PEGASUS] Total Iteration = " + iter_counter); return 0; }