List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile
public void copyToLocalFile(Path src, Path dst) throws IOException
From source file:org.apache.tez.dag.utils.RelocalizationUtils.java
License:Apache License
private static Path downloadResource(String destName, URI uri, Configuration conf, String destDir) throws IOException { FileSystem fs = FileSystem.get(uri, conf); Path cwd = new Path(destDir); Path dFile = new Path(cwd, destName); Path srcPath = new Path(uri); fs.copyToLocalFile(srcPath, dFile); return dFile.makeQualified(FileSystem.getLocal(conf).getUri(), cwd); }
From source file:org.apache.tez.history.TestHistoryParser.java
License:Apache License
private DagInfo getDagInfoFromSimpleHistory(String dagId) throws TezException, IOException { TezDAGID tezDAGID = TezDAGID.fromString(dagId); ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(tezDAGID.getApplicationId(), 1);/*from w w w .jav a2 s. c om*/ Path historyPath = new Path( conf.get("fs.defaultFS") + SIMPLE_HISTORY_DIR + HISTORY_TXT + "." + applicationAttemptId); FileSystem fs = historyPath.getFileSystem(conf); Path localPath = new Path(DOWNLOAD_DIR, HISTORY_TXT); fs.copyToLocalFile(historyPath, localPath); File localFile = new File(DOWNLOAD_DIR, HISTORY_TXT); //Now parse via SimpleHistory SimpleHistoryParser parser = new SimpleHistoryParser(localFile); DagInfo dagInfo = parser.getDAGData(dagId); assertTrue(dagInfo.getDagId().equals(dagId)); return dagInfo; }
From source file:org.commoncrawl.service.listcrawler.HDFSFileIndex.java
License:Open Source License
public HDFSFileIndex(FileSystem remoteFileSystem, Path remoteIndexFileLocation, Path remoteDataFileLocation, File localIndexDataDirectory) throws IOException { _remoteFileSystem = remoteFileSystem; _remoteDataPath = remoteDataFileLocation; // create a local index file for the index _localIndexFilePath = new File(localIndexDataDirectory, remoteIndexFileLocation.getName()); _localIndexFilePath.delete();//from www. j a v a2s. c o m LOG.info("Copying Remote Index Location:" + remoteIndexFileLocation + " to Local File Location:" + _localIndexFilePath); // copy over the index data file remoteFileSystem.copyToLocalFile(remoteIndexFileLocation, new Path(_localIndexFilePath.getAbsolutePath())); LOG.info("Done Copying Remote File. Loading Index"); // load the index loadIndexFromLocalFile(); }
From source file:org.commoncrawl.service.pagerank.slave.BeginPageRankTask.java
License:Open Source License
@Override protected BeginPageRankTaskResult runTask() throws IOException { BeginPageRankTaskResult result = new BeginPageRankTaskResult(); try {/*from ww w . j ava 2 s. c om*/ // create job local directory if necessary _server.getActiveJobLocalPath().mkdirs(); FileSystem fileSystem = _server.getFileSystem(); // figure out if we are going to load values from base location or job config (based on iteration number) Path rangeRemotePath = new Path(_config.getInputValuesPath(), PageRankUtils.makeUniqueFileName(Constants.PR_RANGE_FILE_PREFIX, 0, _server.getNodeIndex())); Path rangeLocalPath = PageRankUtils.makeRangeFilePath(_server.getActiveJobLocalPath(), _server.getNodeIndex()); Path idsRemotePath = new Path(_config.getInputValuesPath(), PageRankUtils.makeUniqueFileName(Constants.PR_IDS_FILE_PREFIX, 0, _server.getNodeIndex())); Path idsLocalPath = new Path(PageRankUtils .makeIdsFilePath(_server.getActiveJobLocalPath(), _server.getNodeIndex()).getAbsolutePath()); Path outlinksFileRemotePath = new Path(_config.getOutlinksDataPath(), PageRankUtils.makeUniqueFileName(Constants.PR_OUTLINKS_FILE_PREFIX, 0, _server.getNodeIndex())); Path outlinksFileLocalPath = new Path(new File(_server.getActiveJobLocalPath(), PageRankUtils.makeUniqueFileName(Constants.PR_OUTLINKS_FILE_PREFIX, 0, _server.getNodeIndex())) .getAbsolutePath()); Path valuesRemotePath = null; if (_config.getIterationNumber() == 0) { // fetch values from base values path valuesRemotePath = new Path(_config.getInputValuesPath(), PageRankUtils .makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX, 0, _server.getNodeIndex())); LOG.info("Iteration Number is 0. Using Values File:" + valuesRemotePath); } else { // fetch latest values from job path (hdfs) based on last iteration number ... valuesRemotePath = new Path(_config.getJobWorkPath(), PageRankUtils.makeUniqueFileName( Constants.PR_VALUE_FILE_PREFIX, _config.getIterationNumber() - 1, _server.getNodeIndex())); LOG.info("Iteration Number is:" + _config.getIterationNumber() + ". Using Values File:" + valuesRemotePath); } /* Path localValuesFilePath = new Path(new File(_server.getActiveJobLocalPath(),PageRankUtils.makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX,_config.getIterationNumber(),_server.getNodeIndex())).getAbsolutePath()); // copy the files to the local directory ... FileStatus rangeFileStatus = fileSystem.getFileStatus(rangeRemotePath); File rangeLocalFile = new File(rangeLocalPath.toString()); if (rangeLocalFile.exists() == false || rangeLocalFile.length() != rangeFileStatus.getLen()) { rangeLocalFile.delete(); LOG.info("Copying Range File:" + rangeRemotePath + " to " + rangeLocalPath); fileSystem.copyToLocalFile(rangeRemotePath, rangeLocalPath); } else { LOG.info("Skipping Copy of Range File:" + rangeRemotePath + " to " + rangeLocalPath); } FileStatus idFileStatus = fileSystem.getFileStatus(idsRemotePath); File idLocalFile = new File(idsLocalPath.toString()); if (idLocalFile.exists() == false || idLocalFile.length() != idFileStatus.getLen()) { LOG.info("Copying Ids File:" + idsRemotePath + " to " + idsLocalPath); fileSystem.copyToLocalFile(idsRemotePath, idsLocalPath); } else { LOG.info("Skipping Copying Ids File:" + idsRemotePath + " to " + idsLocalPath); } */ FileStatus outlinksFileStatus = fileSystem.getFileStatus(outlinksFileRemotePath); File outlinksLocalFile = new File(outlinksFileLocalPath.toString()); if (outlinksLocalFile.exists() == false || outlinksLocalFile.length() != outlinksFileStatus.getLen()) { LOG.info("Copying outlinks File:" + outlinksFileRemotePath + " to " + outlinksLocalFile); fileSystem.copyToLocalFile(outlinksFileRemotePath, outlinksFileLocalPath); } else { LOG.info("Skipping Copying outlinks File:" + outlinksFileRemotePath + " to " + outlinksLocalFile); } /* FileStatus valuesFileStatus = fileSystem.getFileStatus(valuesRemotePath); File valuesLocalFile = new File(localValuesFilePath.toString()); if (valuesLocalFile.exists() == false || valuesLocalFile.length() != valuesFileStatus.getLen()) { LOG.info("Copying values File:" + valuesRemotePath + " to " + valuesLocalFile); fileSystem.copyToLocalFile(valuesRemotePath,localValuesFilePath); } else { LOG.info("Skipping Copying values File:" + valuesRemotePath + " to " + valuesLocalFile); } */ // now load the values map ... result._valueMap = new PageRankUtils.PRValueMap(); //result._valueMap.open(fileSystem,valuesRemotePath, PageRankUtils.makeRangeFilePath(_server.getActiveJobLocalPath(), _server.getNodeIndex())); boolean valuesFileMissing = false; if (_server.getActiveJobConfig().getIterationNumber() != 0 && !_server.getFileSystem().exists(valuesRemotePath)) { LOG.error("Values File Missing for Iteration:" + _server.getActiveJobConfig().getIterationNumber()); valuesFileMissing = true; // revert to iteration zero values file ... valuesRemotePath = new Path(_config.getInputValuesPath(), PageRankUtils .makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX, 0, _server.getNodeIndex())); } result._valueMap.open(fileSystem, valuesRemotePath, rangeRemotePath); // ok now if iteration number is non-zero, // recalculate rank from previous iteration's data ... if (_config.getIterationNumber() != 0 && valuesFileMissing) { // load data from previous iteration ... int iterationNumberToLoadFrom = _config.getIterationNumber() - 1; // ok figure out what state master is in if (_prMasterStatus == PRMasterState.ServerStatus.ITERATING_CALCULATING) { // use current iteration number data iterationNumberToLoadFrom = 0; LOG.info("Master is in CALCULATION PHASE. SKIP LOAD OF VALUEMAP"); } // in the distribution case ... check to see if checkpoint file is present ... else if (_prMasterStatus == PRMasterState.ServerStatus.ITERATING_DISTRIBUTING) { Path checkpointFilePath = PageRankUtils.getCheckpointFilePath( new Path(_server.getActiveJobConfig().getJobWorkPath()), IterationInfo.Phase.DISTRIBUTE, _server.getActiveJobConfig().getIterationNumber(), _server.getNodeIndex()); // ok checkpoint file exists, use current iteration number to load data if (_server.getFileSystem().exists(checkpointFilePath)) { LOG.info("Checkpoint file exists. SKIP LOAD OF VALUEMAP"); iterationNumberToLoadFrom = 0; } } if (iterationNumberToLoadFrom != 0) { // load super domain filter LOG.info("Initializing SuperDomain Filter"); SuperDomainFilter superDomainFilter = new SuperDomainFilter(); superDomainFilter.loadFromPath(_server.getDirectoryServiceAddress(), CrawlEnvironment.ROOT_SUPER_DOMAIN_PATH, false); LOG.info("Starting Calculate Task to load value map - Using Iteration Number:" + iterationNumberToLoadFrom); // first zero value map values ... result._valueMap.zeroValues(); PageRankUtils.calculateRank(_server.getConfig(), _server.getFileSystem(), result._valueMap, _server.getActiveJobLocalPath(), _server.getActiveJobConfig().getJobWorkPath(), _server.getNodeIndex(), _server.getBaseConfig().getSlaveCount(), iterationNumberToLoadFrom, superDomainFilter, new PageRankUtils.ProgressAndCancelCheckCallback() { @Override public boolean updateProgress(final float percentComplete) { _percentComplete = percentComplete; return BeginPageRankTask.this.isCancelled(); } }); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); throw e; } return result; }
From source file:org.commoncrawl.service.queryserver.query.DomainURLListQuery.java
License:Open Source License
@Override public void remoteDispatchComplete(FileSystem fileSystem, Configuration conf, QueryRequest<DomainURLListQueryInfo, URLFPV2, CrawlDatumAndMetadata> request, long resultCount) throws IOException { if (getShardIdToHostMapping().size() != 1) { throw new IOException("Excepected One ShardIdToHostMapping. Got:" + getShardIdToHostMapping().size()); }/*w w w . j a v a 2s . c om*/ LOG.info("remoteDispathc Complete Called"); Path remoteURLListPath = getRemoteOutputFilePath(getClientQueryInfo(), getShardIdToHostMapping().get(0).getShardId()); if (fileSystem.exists(remoteURLListPath)) { LocalFileSystem localFS = FileSystem.getLocal(conf); Path localURLListPath = new Path(getLocalQueryResultsPathPrefix(request) + getURLOutputFileNameBasedOnSortByField(request.getClientQueryInfo().getSortByField())); localFS.delete(localURLListPath); LOG.info("Copying " + remoteURLListPath + " to LocalPath:" + localURLListPath); fileSystem.copyToLocalFile(remoteURLListPath, localURLListPath); } }
From source file:org.commoncrawl.service.queryserver.slave.SlaveServer.java
License:Open Source License
private File copyAcrossQueryDBFile(Path remotePath) throws IOException { FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem(); // get the status of the specified file FileStatus fileStatus = fileSystem.getFileStatus(remotePath); File localDirectory = new File(getJobLocalPath(), remotePath.getParent().getName()); if (!localDirectory.exists()) { localDirectory.mkdirs();/*from w w w. ja v a 2 s .c om*/ } File localFile = new File(localDirectory, remotePath.getName()); if (localFile.exists() == false || localFile.length() != fileStatus.getLen()) { localFile.delete(); LOG.info("Copying Remote File:" + remotePath + " to " + localFile); fileSystem.copyToLocalFile(remotePath, new Path(localFile.getAbsolutePath())); } else { LOG.info("Skipping Copy of Remote File:" + remotePath + " to " + localFile); } return localFile; }
From source file:org.goldenorb.OrbTracker.java
License:Apache License
@Override public void getRequiredFiles(OrbConfiguration jobConf) throws OrbZKFailure { logger.info("jobConf.getHDFSdistributedFiles(): {}", jobConf.getHDFSdistributedFiles()); try {/*w w w. j av a 2 s.co m*/ Path[] hdfsPaths = jobConf.getHDFSdistributedFiles(); if (hdfsPaths != null) { String baseLocalPath = System.getProperty("java.io.tmpdir") + "/GoldenOrb/" + jobConf.getOrbClusterName() + "/" + jobConf.getJobNumber() + "/"; FileSystem fs = FileSystem.get(jobConf); for (Path path : hdfsPaths) { String[] name = path.toString().split("/"); fs.copyToLocalFile(path, new Path(baseLocalPath + name[name.length - 1])); logger.info(path.toString() + " copied from HDFS to local machine at " + baseLocalPath + name[name.length - 1]); } } } catch (IOException e) { logger.error("EXCEPTION occured while copying files from HDFS to local machine : " + e.getMessage()); e.printStackTrace(); //throw new OrbZKFailure(e); } }
From source file:org.mrgeo.hdfs.utils.HadoopFileUtils.java
License:Apache License
public static void get(final Configuration conf, final Path fromDir, final Path toDir, final String fileName) throws IOException { final FileSystem fs = getFileSystem(conf, fromDir); final FileSystem fsTo = toDir.getFileSystem(conf); fsTo.mkdirs(toDir);//from w w w .ja v a 2s . c o m fs.copyToLocalFile(new Path(fromDir, fileName), new Path(toDir, fileName)); }
From source file:org.mrgeo.test.MapOpTestUtils.java
License:Apache License
public void generateBaselinePyramid(final Configuration conf, final String testName, final String ex) throws IOException, JobFailedException, JobCancelledException, ParserException { runMapAlgebraExpression(conf, testName, ex); final Path src = new Path(outputHdfs, testName); final MrsPyramid pyramid = MrsPyramid.open(src.toString(), (ProviderProperties) null); if (pyramid != null) { final Path dst = new Path(inputLocal, testName); final FileSystem fs = dst.getFileSystem(conf); fs.copyToLocalFile(src, dst); }//ww w . j a v a 2 s.co m }
From source file:org.mrgeo.test.MapOpTestVectorUtils.java
License:Apache License
public void generateBaselineVector(final Configuration conf, final String testName, final String ex) throws IOException, ParserException, JobFailedException, JobCancelledException { runMapAlgebraExpression(conf, testName, ex); final Path src = new Path(outputHdfs, testName); final FileSystem srcfs = src.getFileSystem(conf); if (srcfs.exists(src)) { final Path dst = new Path(inputLocal, testName); final FileSystem fs = dst.getFileSystem(conf); fs.copyToLocalFile(src, dst); }/* ww w. ja v a 2 s.c o m*/ }