Example usage for org.apache.hadoop.fs FileSystem copyToLocalFile

List of usage examples for org.apache.hadoop.fs FileSystem copyToLocalFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem copyToLocalFile.

Prototype

public void copyToLocalFile(Path src, Path dst) throws IOException 

Source Link

Document

Copy it a file from the remote filesystem to the local one.

Usage

From source file:org.apache.tez.dag.utils.RelocalizationUtils.java

License:Apache License

private static Path downloadResource(String destName, URI uri, Configuration conf, String destDir)
        throws IOException {
    FileSystem fs = FileSystem.get(uri, conf);
    Path cwd = new Path(destDir);
    Path dFile = new Path(cwd, destName);
    Path srcPath = new Path(uri);
    fs.copyToLocalFile(srcPath, dFile);
    return dFile.makeQualified(FileSystem.getLocal(conf).getUri(), cwd);
}

From source file:org.apache.tez.history.TestHistoryParser.java

License:Apache License

private DagInfo getDagInfoFromSimpleHistory(String dagId) throws TezException, IOException {
    TezDAGID tezDAGID = TezDAGID.fromString(dagId);
    ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(tezDAGID.getApplicationId(),
            1);/*from w  w w .jav a2 s.  c  om*/
    Path historyPath = new Path(
            conf.get("fs.defaultFS") + SIMPLE_HISTORY_DIR + HISTORY_TXT + "." + applicationAttemptId);
    FileSystem fs = historyPath.getFileSystem(conf);

    Path localPath = new Path(DOWNLOAD_DIR, HISTORY_TXT);
    fs.copyToLocalFile(historyPath, localPath);
    File localFile = new File(DOWNLOAD_DIR, HISTORY_TXT);

    //Now parse via SimpleHistory
    SimpleHistoryParser parser = new SimpleHistoryParser(localFile);
    DagInfo dagInfo = parser.getDAGData(dagId);
    assertTrue(dagInfo.getDagId().equals(dagId));
    return dagInfo;
}

From source file:org.commoncrawl.service.listcrawler.HDFSFileIndex.java

License:Open Source License

public HDFSFileIndex(FileSystem remoteFileSystem, Path remoteIndexFileLocation, Path remoteDataFileLocation,
        File localIndexDataDirectory) throws IOException {
    _remoteFileSystem = remoteFileSystem;
    _remoteDataPath = remoteDataFileLocation;
    // create a local index file for the index
    _localIndexFilePath = new File(localIndexDataDirectory, remoteIndexFileLocation.getName());
    _localIndexFilePath.delete();//from   www.  j a v  a2s.  c o m

    LOG.info("Copying Remote Index Location:" + remoteIndexFileLocation + " to Local File Location:"
            + _localIndexFilePath);
    // copy over the index data file 
    remoteFileSystem.copyToLocalFile(remoteIndexFileLocation, new Path(_localIndexFilePath.getAbsolutePath()));
    LOG.info("Done Copying Remote File. Loading Index");
    // load the index 
    loadIndexFromLocalFile();
}

From source file:org.commoncrawl.service.pagerank.slave.BeginPageRankTask.java

License:Open Source License

@Override
protected BeginPageRankTaskResult runTask() throws IOException {

    BeginPageRankTaskResult result = new BeginPageRankTaskResult();

    try {/*from   ww w  .  j  ava  2  s.  c  om*/

        // create job local directory if necessary 
        _server.getActiveJobLocalPath().mkdirs();

        FileSystem fileSystem = _server.getFileSystem();

        // figure out if we are going to load values from base location or job config (based on iteration number)

        Path rangeRemotePath = new Path(_config.getInputValuesPath(),
                PageRankUtils.makeUniqueFileName(Constants.PR_RANGE_FILE_PREFIX, 0, _server.getNodeIndex()));
        Path rangeLocalPath = PageRankUtils.makeRangeFilePath(_server.getActiveJobLocalPath(),
                _server.getNodeIndex());

        Path idsRemotePath = new Path(_config.getInputValuesPath(),
                PageRankUtils.makeUniqueFileName(Constants.PR_IDS_FILE_PREFIX, 0, _server.getNodeIndex()));
        Path idsLocalPath = new Path(PageRankUtils
                .makeIdsFilePath(_server.getActiveJobLocalPath(), _server.getNodeIndex()).getAbsolutePath());

        Path outlinksFileRemotePath = new Path(_config.getOutlinksDataPath(),
                PageRankUtils.makeUniqueFileName(Constants.PR_OUTLINKS_FILE_PREFIX, 0, _server.getNodeIndex()));
        Path outlinksFileLocalPath = new Path(new File(_server.getActiveJobLocalPath(),
                PageRankUtils.makeUniqueFileName(Constants.PR_OUTLINKS_FILE_PREFIX, 0, _server.getNodeIndex()))
                        .getAbsolutePath());

        Path valuesRemotePath = null;

        if (_config.getIterationNumber() == 0) {
            // fetch values from base values path 
            valuesRemotePath = new Path(_config.getInputValuesPath(), PageRankUtils
                    .makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX, 0, _server.getNodeIndex()));
            LOG.info("Iteration Number is 0. Using Values File:" + valuesRemotePath);
        } else {
            // fetch latest values from job path (hdfs) based on last iteration number ...
            valuesRemotePath = new Path(_config.getJobWorkPath(), PageRankUtils.makeUniqueFileName(
                    Constants.PR_VALUE_FILE_PREFIX, _config.getIterationNumber() - 1, _server.getNodeIndex()));
            LOG.info("Iteration Number is:" + _config.getIterationNumber() + ". Using Values File:"
                    + valuesRemotePath);
        }

        /*
        Path localValuesFilePath = new Path(new File(_server.getActiveJobLocalPath(),PageRankUtils.makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX,_config.getIterationNumber(),_server.getNodeIndex())).getAbsolutePath());
                
                
        // copy the files to the local directory ...
        FileStatus rangeFileStatus = fileSystem.getFileStatus(rangeRemotePath);
        File       rangeLocalFile  = new File(rangeLocalPath.toString());
                
        if (rangeLocalFile.exists() == false || rangeLocalFile.length() != rangeFileStatus.getLen()) { 
          rangeLocalFile.delete();
          LOG.info("Copying Range File:" + rangeRemotePath + " to " + rangeLocalPath);
          fileSystem.copyToLocalFile(rangeRemotePath, rangeLocalPath);
        }
        else { 
          LOG.info("Skipping Copy of Range File:" + rangeRemotePath + " to " + rangeLocalPath);
        }
                
        FileStatus idFileStatus = fileSystem.getFileStatus(idsRemotePath);
        File       idLocalFile  = new File(idsLocalPath.toString());
                
        if (idLocalFile.exists() == false || idLocalFile.length() != idFileStatus.getLen()) { 
          LOG.info("Copying Ids File:" + idsRemotePath + " to " + idsLocalPath);
          fileSystem.copyToLocalFile(idsRemotePath, idsLocalPath);
        }
        else { 
          LOG.info("Skipping Copying Ids File:" + idsRemotePath + " to " + idsLocalPath);
        }
           */

        FileStatus outlinksFileStatus = fileSystem.getFileStatus(outlinksFileRemotePath);
        File outlinksLocalFile = new File(outlinksFileLocalPath.toString());

        if (outlinksLocalFile.exists() == false || outlinksLocalFile.length() != outlinksFileStatus.getLen()) {
            LOG.info("Copying outlinks File:" + outlinksFileRemotePath + " to " + outlinksLocalFile);
            fileSystem.copyToLocalFile(outlinksFileRemotePath, outlinksFileLocalPath);
        } else {
            LOG.info("Skipping Copying outlinks File:" + outlinksFileRemotePath + " to " + outlinksLocalFile);
        }

        /*
        FileStatus valuesFileStatus = fileSystem.getFileStatus(valuesRemotePath);
        File       valuesLocalFile  = new File(localValuesFilePath.toString());
                
        if (valuesLocalFile.exists() == false || valuesLocalFile.length() != valuesFileStatus.getLen()) { 
          LOG.info("Copying values File:" + valuesRemotePath + " to " + valuesLocalFile);
          fileSystem.copyToLocalFile(valuesRemotePath,localValuesFilePath);
        }
        else { 
          LOG.info("Skipping Copying values File:" + valuesRemotePath + " to " + valuesLocalFile);
        }
        */
        // now load the values map ...
        result._valueMap = new PageRankUtils.PRValueMap();
        //result._valueMap.open(fileSystem,valuesRemotePath, PageRankUtils.makeRangeFilePath(_server.getActiveJobLocalPath(), _server.getNodeIndex()));

        boolean valuesFileMissing = false;
        if (_server.getActiveJobConfig().getIterationNumber() != 0
                && !_server.getFileSystem().exists(valuesRemotePath)) {
            LOG.error("Values File Missing for Iteration:" + _server.getActiveJobConfig().getIterationNumber());

            valuesFileMissing = true;
            // revert to iteration zero values file ... 
            valuesRemotePath = new Path(_config.getInputValuesPath(), PageRankUtils
                    .makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX, 0, _server.getNodeIndex()));
        }
        result._valueMap.open(fileSystem, valuesRemotePath, rangeRemotePath);

        // ok now if iteration number is non-zero,
        // recalculate rank from previous iteration's data ...

        if (_config.getIterationNumber() != 0 && valuesFileMissing) {

            // load data from previous iteration ... 
            int iterationNumberToLoadFrom = _config.getIterationNumber() - 1;
            // ok figure out what state master is in 
            if (_prMasterStatus == PRMasterState.ServerStatus.ITERATING_CALCULATING) {
                // use current iteration number data 
                iterationNumberToLoadFrom = 0;
                LOG.info("Master is in CALCULATION PHASE. SKIP LOAD OF VALUEMAP");
            }
            // in the distribution case ... check to see if checkpoint file is present ... 
            else if (_prMasterStatus == PRMasterState.ServerStatus.ITERATING_DISTRIBUTING) {

                Path checkpointFilePath = PageRankUtils.getCheckpointFilePath(
                        new Path(_server.getActiveJobConfig().getJobWorkPath()), IterationInfo.Phase.DISTRIBUTE,
                        _server.getActiveJobConfig().getIterationNumber(), _server.getNodeIndex());

                // ok checkpoint file exists, use current iteration number to load data 
                if (_server.getFileSystem().exists(checkpointFilePath)) {
                    LOG.info("Checkpoint file exists. SKIP LOAD OF VALUEMAP");
                    iterationNumberToLoadFrom = 0;
                }
            }

            if (iterationNumberToLoadFrom != 0) {
                // load super domain filter
                LOG.info("Initializing SuperDomain Filter");
                SuperDomainFilter superDomainFilter = new SuperDomainFilter();
                superDomainFilter.loadFromPath(_server.getDirectoryServiceAddress(),
                        CrawlEnvironment.ROOT_SUPER_DOMAIN_PATH, false);

                LOG.info("Starting Calculate Task to load value map - Using Iteration Number:"
                        + iterationNumberToLoadFrom);

                // first zero value map values ... 
                result._valueMap.zeroValues();

                PageRankUtils.calculateRank(_server.getConfig(), _server.getFileSystem(), result._valueMap,
                        _server.getActiveJobLocalPath(), _server.getActiveJobConfig().getJobWorkPath(),
                        _server.getNodeIndex(), _server.getBaseConfig().getSlaveCount(),
                        iterationNumberToLoadFrom, superDomainFilter,
                        new PageRankUtils.ProgressAndCancelCheckCallback() {

                            @Override
                            public boolean updateProgress(final float percentComplete) {
                                _percentComplete = percentComplete;
                                return BeginPageRankTask.this.isCancelled();
                            }
                        });
            }
        }
    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
        throw e;
    }
    return result;
}

From source file:org.commoncrawl.service.queryserver.query.DomainURLListQuery.java

License:Open Source License

@Override
public void remoteDispatchComplete(FileSystem fileSystem, Configuration conf,
        QueryRequest<DomainURLListQueryInfo, URLFPV2, CrawlDatumAndMetadata> request, long resultCount)
        throws IOException {

    if (getShardIdToHostMapping().size() != 1) {
        throw new IOException("Excepected One ShardIdToHostMapping. Got:" + getShardIdToHostMapping().size());
    }/*w  w w . j a v  a  2s  .  c  om*/

    LOG.info("remoteDispathc Complete Called");
    Path remoteURLListPath = getRemoteOutputFilePath(getClientQueryInfo(),
            getShardIdToHostMapping().get(0).getShardId());

    if (fileSystem.exists(remoteURLListPath)) {

        LocalFileSystem localFS = FileSystem.getLocal(conf);
        Path localURLListPath = new Path(getLocalQueryResultsPathPrefix(request)
                + getURLOutputFileNameBasedOnSortByField(request.getClientQueryInfo().getSortByField()));
        localFS.delete(localURLListPath);
        LOG.info("Copying " + remoteURLListPath + " to LocalPath:" + localURLListPath);
        fileSystem.copyToLocalFile(remoteURLListPath, localURLListPath);
    }
}

From source file:org.commoncrawl.service.queryserver.slave.SlaveServer.java

License:Open Source License

private File copyAcrossQueryDBFile(Path remotePath) throws IOException {
    FileSystem fileSystem = CrawlEnvironment.getDefaultFileSystem();

    // get the status of the specified file 
    FileStatus fileStatus = fileSystem.getFileStatus(remotePath);
    File localDirectory = new File(getJobLocalPath(), remotePath.getParent().getName());
    if (!localDirectory.exists()) {
        localDirectory.mkdirs();/*from w  w  w. ja  v  a 2 s  .c om*/
    }

    File localFile = new File(localDirectory, remotePath.getName());

    if (localFile.exists() == false || localFile.length() != fileStatus.getLen()) {
        localFile.delete();
        LOG.info("Copying Remote File:" + remotePath + " to " + localFile);
        fileSystem.copyToLocalFile(remotePath, new Path(localFile.getAbsolutePath()));
    } else {
        LOG.info("Skipping Copy of Remote File:" + remotePath + " to " + localFile);
    }
    return localFile;
}

From source file:org.goldenorb.OrbTracker.java

License:Apache License

@Override
public void getRequiredFiles(OrbConfiguration jobConf) throws OrbZKFailure {
    logger.info("jobConf.getHDFSdistributedFiles(): {}", jobConf.getHDFSdistributedFiles());
    try {/*w  w w.  j  av a  2  s.co m*/
        Path[] hdfsPaths = jobConf.getHDFSdistributedFiles();
        if (hdfsPaths != null) {
            String baseLocalPath = System.getProperty("java.io.tmpdir") + "/GoldenOrb/"
                    + jobConf.getOrbClusterName() + "/" + jobConf.getJobNumber() + "/";
            FileSystem fs = FileSystem.get(jobConf);
            for (Path path : hdfsPaths) {
                String[] name = path.toString().split("/");
                fs.copyToLocalFile(path, new Path(baseLocalPath + name[name.length - 1]));
                logger.info(path.toString() + " copied from HDFS to local machine at " + baseLocalPath
                        + name[name.length - 1]);
            }
        }

    } catch (IOException e) {
        logger.error("EXCEPTION occured while copying files from HDFS to local machine : " + e.getMessage());
        e.printStackTrace();
        //throw new OrbZKFailure(e);
    }
}

From source file:org.mrgeo.hdfs.utils.HadoopFileUtils.java

License:Apache License

public static void get(final Configuration conf, final Path fromDir, final Path toDir, final String fileName)
        throws IOException {
    final FileSystem fs = getFileSystem(conf, fromDir);

    final FileSystem fsTo = toDir.getFileSystem(conf);
    fsTo.mkdirs(toDir);//from w w  w .ja  v a 2s . c  o m

    fs.copyToLocalFile(new Path(fromDir, fileName), new Path(toDir, fileName));
}

From source file:org.mrgeo.test.MapOpTestUtils.java

License:Apache License

public void generateBaselinePyramid(final Configuration conf, final String testName, final String ex)
        throws IOException, JobFailedException, JobCancelledException, ParserException {

    runMapAlgebraExpression(conf, testName, ex);

    final Path src = new Path(outputHdfs, testName);
    final MrsPyramid pyramid = MrsPyramid.open(src.toString(), (ProviderProperties) null);
    if (pyramid != null) {
        final Path dst = new Path(inputLocal, testName);
        final FileSystem fs = dst.getFileSystem(conf);
        fs.copyToLocalFile(src, dst);
    }//ww w  . j  a  v  a  2 s.co  m
}

From source file:org.mrgeo.test.MapOpTestVectorUtils.java

License:Apache License

public void generateBaselineVector(final Configuration conf, final String testName, final String ex)
        throws IOException, ParserException, JobFailedException, JobCancelledException {
    runMapAlgebraExpression(conf, testName, ex);

    final Path src = new Path(outputHdfs, testName);
    final FileSystem srcfs = src.getFileSystem(conf);
    if (srcfs.exists(src)) {
        final Path dst = new Path(inputLocal, testName);
        final FileSystem fs = dst.getFileSystem(conf);
        fs.copyToLocalFile(src, dst);
    }/*  ww  w. ja  v  a  2  s.c  o m*/
}