Example usage for org.apache.hadoop.fs FileSystem makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem makeQualified.

Prototype

public Path makeQualified(Path path)

Source Link

Document

Qualify a path to one which uses this FileSystem and, if relative, made absolute.

Usage

From source file:org.apache.gobblin.data.management.copy.replication.ConfigBasedDataset.java

License:Apache License

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs,
        CopyConfiguration copyConfiguration) throws IOException {
    boolean enforceFileSizeMatch = this.rc.getEnforceFileSizeMatchFromConfigStore().isPresent()
            ? this.rc.getEnforceFileSizeMatchFromConfigStore().get()
            : copyConfiguration.isEnforceFileLengthMatch();

    List<CopyEntity> copyableFiles = Lists.newArrayList();
    EndPoint copyFromRaw = copyRoute.getCopyFrom();
    EndPoint copyToRaw = copyRoute.getCopyTo();
    if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) {
        log.warn("Currently only handle the Hadoop Fs EndPoint replication");
        return copyableFiles;
    }/*  w  w  w.j a  v a  2s  .  c o m*/

    if (!this.srcDataFileVersionStrategy.isPresent() || !this.dstDataFileVersionStrategy.isPresent()) {
        log.warn("Version strategy doesn't exist, cannot handle copy");
        return copyableFiles;
    }

    if (!this.srcDataFileVersionStrategy.get().getClass().getName()
            .equals(this.dstDataFileVersionStrategy.get().getClass().getName())) {
        log.warn("Version strategy src: {} and dst: {} doesn't match, cannot handle copy.",
                this.srcDataFileVersionStrategy.get().getClass().getName(),
                this.dstDataFileVersionStrategy.get().getClass().getName());
        return copyableFiles;
    }

    //For {@link HadoopFsEndPoint}s, set pathfilter and applyFilterToDirectories
    HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw;
    HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw;
    copyFrom.setPathFilter(pathFilter);
    copyFrom.setApplyFilterToDirectories(applyFilterToDirectories);
    copyTo.setPathFilter(pathFilter);
    copyTo.setApplyFilterToDirectories(applyFilterToDirectories);

    if (this.watermarkEnabled) {
        if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent())
                || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()
                        && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) {
            log.info(
                    "No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}",
                    copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A",
                    this.rc.getMetaData());
            return copyableFiles;
        }
    }

    Configuration conf = HadoopUtils.newConfiguration();
    FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf);
    FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);

    Collection<FileStatus> allFilesInSource = copyFrom.getFiles();
    Collection<FileStatus> allFilesInTarget = copyTo.getFiles();

    Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(allFilesInSource);
    Map<Path, FileStatus> copyToFileMap = Maps.newHashMap();
    for (FileStatus f : allFilesInTarget) {
        copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f);
    }

    Collection<Path> deletedPaths = Lists.newArrayList();

    boolean watermarkMetadataCopied = false;

    boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource();

    for (FileStatus originFileStatus : copyFromFileStatuses) {
        Path relative = PathUtils.relativizePath(
                PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()),
                PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath()));
        // construct the new path in the target file system
        Path newPath = new Path(copyTo.getDatasetPath(), relative);

        if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) {
            watermarkMetadataCopied = true;
        }

        boolean shouldCopy = true;
        if (copyToFileMap.containsKey(newPath)) {
            Comparable srcVer = this.srcDataFileVersionStrategy.get().getVersion(originFileStatus.getPath());
            Comparable dstVer = this.dstDataFileVersionStrategy.get()
                    .getVersion(copyToFileMap.get(newPath).getPath());

            // destination has higher version, skip the copy
            if (srcVer.compareTo(dstVer) <= 0) {
                if (!enforceFileSizeMatch || copyToFileMap.get(newPath).getLen() == originFileStatus.getLen()) {
                    log.debug("Copy from src {} (v:{}) to dst {} (v:{}) can be skipped.",
                            originFileStatus.getPath(), srcVer, copyToFileMap.get(newPath).getPath(), dstVer);
                    shouldCopy = false;
                } else {
                    log.debug(
                            "Copy from src {} (v:{}) to dst {} (v:{}) can not be skipped due to unmatched file length.",
                            originFileStatus.getPath(), srcVer, copyToFileMap.get(newPath).getPath(), dstVer);
                }
            } else {
                log.debug("Copy from src {} (v:{}) to dst {} (v:{}) is needed due to a higher version.",
                        originFileStatus.getPath(), srcVer, copyToFileMap.get(newPath).getPath(), dstVer);
            }
        } else {
            log.debug("Copy from src {} to dst {} is needed because dst doesn't contain the file",
                    originFileStatus.getPath(), copyToFileMap.get(newPath));
        }

        if (shouldCopy) {
            // need to remove those files in the target File System
            if (copyToFileMap.containsKey(newPath)) {
                deletedPaths.add(newPath);
            }
            CopyableFile copyableFile = CopyableFile
                    .fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath),
                            copyConfiguration)
                    .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString())
                    .dataFileVersionStrategy(
                            this.versionStrategyFromCS.isPresent() ? this.versionStrategyFromCS.get() : null)
                    .build();
            copyableFile.setFsDatasets(copyFromFs, copyToFs);
            copyableFiles.add(copyableFile);
        }

        // clean up already checked paths
        copyToFileMap.remove(newPath);
    }

    // delete the paths on target directory if NOT exists on source
    if (deleteTargetIfNotExistOnSource) {
        deletedPaths.addAll(copyToFileMap.keySet());
    }

    // delete old files first
    if (!deletedPaths.isEmpty()) {
        DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths,
                this.props);
        copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(),
                Maps.<String, String>newHashMap(), deleteCommitStep, 0));
    }

    // generate the watermark file even if watermark checking is disabled. Make sure it can come into functional once disired.
    if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) {
        copyableFiles
                .add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(),
                        new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(),
                                copyTo.getDatasetPath(), copyFrom.getWatermark().get()),
                        1));
    }
    return copyableFiles;
}

From source file:org.apache.hadoop.examples.BaileyBorweinPlouffe.java

License:Apache License

/** Run a map/reduce job to compute Pi. */
private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf,
        PrintStream out) throws IOException {
    final String name = startDigit + "_" + nDigits;

    //setup wroking directory
    out.println("Working Directory = " + workingDir);
    out.println();//from   w w  w. j  a v  a  2s .  c  om
    final FileSystem fs = FileSystem.get(conf);
    final Path dir = fs.makeQualified(new Path(workingDir));
    if (fs.exists(dir)) {
        throw new IOException("Working directory " + dir + " already exists.  Please remove it first.");
    } else if (!fs.mkdirs(dir)) {
        throw new IOException("Cannot create working directory " + dir);
    }

    out.println("Start Digit      = " + startDigit);
    out.println("Number of Digits = " + nDigits);
    out.println("Number of Maps   = " + nMaps);

    // setup a job
    final Job job = createJob(name, conf);
    final Path hexfile = new Path(dir, "pi_" + name + ".hex");
    FileOutputFormat.setOutputPath(job, new Path(dir, "out"));

    // setup custom properties
    job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString());
    job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString());

    job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit);
    job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits);
    job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps);

    // start a map/reduce job
    out.println("\nStarting Job ...");
    final long startTime = Time.monotonicNow();
    try {
        if (!job.waitForCompletion(true)) {
            out.println("Job failed.");
            System.exit(1);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (Time.monotonicNow() - startTime) / 1000.0;
        out.println("Duration is " + duration + " seconds.");
    }
    out.println("Output file: " + hexfile);
}

From source file:org.apache.hadoop.examples.pi.DistSum.java

License:Apache License

/** Start a job to compute sigma */
private void compute(final String name, Summation sigma) throws IOException {
    if (sigma.getValue() != null)
        throw new IOException("sigma.getValue() != null, sigma=" + sigma);

    //setup remote directory
    final FileSystem fs = FileSystem.get(getConf());
    final Path dir = fs.makeQualified(new Path(parameters.remoteDir, name));
    if (!Util.createNonexistingDirectory(fs, dir))
        return;/*from  w  w w .  j a v a 2s.  c o  m*/

    //setup a job
    final Job job = createJob(name, sigma);
    final Path outdir = new Path(dir, "out");
    FileOutputFormat.setOutputPath(job, outdir);

    //start a map/reduce job
    final String startmessage = "steps/parts = " + sigma.E.getSteps() + "/" + parameters.nParts + " = "
            + Util.long2string(sigma.E.getSteps() / parameters.nParts);
    Util.runJob(name, job, parameters.machine, startmessage, timer);
    final List<TaskResult> results = Util.readJobOutputs(fs, outdir);
    Util.writeResults(name, results, fs, parameters.remoteDir);
    fs.delete(dir, true);

    //combine results
    final List<TaskResult> combined = Util.combine(results);
    final PrintWriter out = Util.createWriter(parameters.localDir, name);
    try {
        for (TaskResult r : combined) {
            final String s = taskResult2string(name, r);
            out.println(s);
            out.flush();
            Util.out.println(s);
        }
    } finally {
        out.close();
    }
    if (combined.size() == 1) {
        final Summation s = combined.get(0).getElement();
        if (sigma.contains(s) && s.contains(sigma))
            sigma.setValue(s.getValue());
    }
}

From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*from   ww  w.  j  av  a2 s  . c o m*/
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    //setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = Time.monotonicNow();
        job.waitForCompletion(true);
        if (!job.isSuccessful()) {
            System.out.println("Job " + job.getJobID() + " failed!");
            System.exit(1);
        }
        final double duration = (Time.monotonicNow() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:org.apache.hama.bsp.BSPJobClient.java

License:Apache License

public RunningJob submitJobInternal(BSPJob pJob, BSPJobID jobId) throws IOException {
    BSPJob job = pJob;/* w w w.j av a  2 s  . c o  m*/
    job.setJobID(jobId);

    int maxTasks;
    int configured = job.getConfiguration().getInt(Constants.MAX_TASKS_PER_JOB, job.getNumBspTask());

    ClusterStatus clusterStatus = getClusterStatus(true);
    // Re-adjust the maxTasks based on cluster status.
    if (clusterStatus != null) {
        maxTasks = clusterStatus.getMaxTasks() - clusterStatus.getTasks();

        if (configured > maxTasks) {
            LOG.warn("The configured number of tasks has exceeded the maximum allowed. Job will run with "
                    + (maxTasks) + " tasks.");
            job.setNumBspTask(maxTasks);
        }
    } else {
        maxTasks = configured;
    }

    Path submitJobDir = new Path(getSystemDir(), "submit_" + Integer.toString(Math.abs(r.nextInt()), 36));
    Path submitSplitFile = new Path(submitJobDir, "job.split");
    Path submitJarFile = new Path(submitJobDir, "job.jar");
    Path submitJobFile = new Path(submitJobDir, "job.xml");
    LOG.debug("BSPJobClient.submitJobDir: " + submitJobDir);

    FileSystem fs = getFs();
    // Create a number of filenames in the BSPMaster's fs namespace
    fs.delete(submitJobDir, true);
    submitJobDir = fs.makeQualified(submitJobDir);
    submitJobDir = new Path(submitJobDir.toUri().getPath());
    FsPermission bspSysPerms = new FsPermission(JOB_DIR_PERMISSION);
    FileSystem.mkdirs(fs, submitJobDir, bspSysPerms);
    fs.mkdirs(submitJobDir);
    short replication = (short) job.getInt("bsp.submit.replication", 10);

    // only create the splits if we have an input
    if ((job.get(Constants.JOB_INPUT_DIR) != null) || (job.get("bsp.join.expr") != null)) {
        // Create the splits for the job
        LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));

        InputSplit[] splits = job.getInputFormat().getSplits(job,
                (maxTasks > configured) ? configured : maxTasks);

        if (job.getConfiguration().getBoolean(Constants.ENABLE_RUNTIME_PARTITIONING, false)) {
            LOG.info("Run pre-partitioning job");
            job = partition(job, splits, maxTasks);
            maxTasks = job.getInt("hama.partition.count", maxTasks);
        }

        if (job.getBoolean("input.has.partitioned", false)) {
            splits = job.getInputFormat().getSplits(job, maxTasks);
        }

        if (maxTasks < splits.length) {
            throw new IOException(
                    "Job failed! The number of splits has exceeded the number of max tasks. The number of splits: "
                            + splits.length + ", The number of max tasks: " + maxTasks);
        }

        int numOfSplits = writeSplits(job, splits, submitSplitFile, maxTasks);
        if (numOfSplits > configured
                || !job.getConfiguration().getBoolean(Constants.FORCE_SET_BSP_TASKS, false)) {
            job.setNumBspTask(numOfSplits);
        }

        job.set("bsp.job.split.file", submitSplitFile.toString());
    }

    String originalJarPath = job.getJar();

    if (originalJarPath != null) { // copy jar to BSPMaster's fs
        // use jar name if job is not named.
        if ("".equals(job.getJobName())) {
            job.setJobName(new Path(originalJarPath).getName());
        }
        job.setJar(submitJarFile.toString());
        fs.copyFromLocalFile(new Path(originalJarPath), submitJarFile);

        fs.setReplication(submitJarFile, replication);
        fs.setPermission(submitJarFile, new FsPermission(JOB_FILE_PERMISSION));
    } else {
        LOG.warn("No job jar file set.  User classes may not be found. "
                + "See BSPJob#setJar(String) or check Your jar file.");
    }

    // Set the user's name and working directory
    job.setUser(getUnixUserName());
    job.set("group.name", getUnixUserGroupName(job.getUser()));
    if (job.getWorkingDirectory() == null) {
        job.setWorkingDirectory(fs.getWorkingDirectory());
    }

    // Write job file to BSPMaster's fs
    FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JOB_FILE_PERMISSION));

    try {
        job.writeXml(out);
    } finally {
        out.close();
    }

    return launchJob(jobId, job, submitJobFile, fs);
}

From source file:org.apache.hama.bsp.FileOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(FileSystem ignored, BSPJob job)
        throws FileAlreadyExistsException, InvalidJobConfException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null && job.getNumBspTask() != 0) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }/*from w  w w .  jav  a  2 s.com*/
    if (outDir != null) {
        FileSystem fs = outDir.getFileSystem(job.getConfiguration());
        // normalize the output directory
        outDir = fs.makeQualified(outDir);
        setOutputPath(job, outDir);
        // check its existence
        if (fs.exists(outDir)) {
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
    }
}

From source file:org.apache.hama.bsp.YARNBSPJobClient.java

License:Apache License

@Override
protected RunningJob launchJob(BSPJobID jobId, BSPJob normalJob, Path submitJobFile, FileSystem pFs)
        throws IOException {
    YARNBSPJob job = (YARNBSPJob) normalJob;

    LOG.info("Submitting job...");
    if (getConf().get("bsp.child.mem.in.mb") == null) {
        LOG.warn("BSP Child memory has not been set, YARN will guess your needs or use default values.");
    }/*from  w w  w .  ja va 2  s.  co m*/

    FileSystem fs = pFs;
    if (fs == null) {
        fs = FileSystem.get(getConf());
    }

    if (getConf().get("bsp.user.name") == null) {
        String s = getUnixUserName();
        getConf().set("bsp.user.name", s);
        LOG.debug("Retrieved username: " + s);
    }

    yarnClient.start();
    try {
        YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
        LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers="
                + clusterMetrics.getNumNodeManagers());

        List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        LOG.info("Got Cluster node info from ASM");
        for (NodeReport node : clusterNodeReports) {
            LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                    + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                    + node.getNumContainers());
        }

        QueueInfo queueInfo = yarnClient.getQueueInfo("default");
        LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
                + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
                + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
                + queueInfo.getChildQueues().size());

        List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
        for (QueueUserACLInfo aclInfo : listAclInfo) {
            for (QueueACL userAcl : aclInfo.getUserAcls()) {
                LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                        + userAcl.name());
            }
        }

        // Get a new application id
        YarnClientApplication app = yarnClient.createApplication();

        // Create a new ApplicationSubmissionContext
        //ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
        ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();

        id = appContext.getApplicationId();

        // set the application name
        appContext.setApplicationName(job.getJobName());

        // Create a new container launch context for the AM's container
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

        // Define the local resources required
        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
        // Lets assume the jar we need for our ApplicationMaster is available in
        // HDFS at a certain known path to us and we want to make it available to
        // the ApplicationMaster in the launched container
        if (job.getJar() == null) {
            throw new IllegalArgumentException("Jar must be set in order to run the application!");
        }

        Path jarPath = new Path(job.getJar());
        jarPath = fs.makeQualified(jarPath);
        getConf().set("bsp.jar", jarPath.makeQualified(fs.getUri(), jarPath).toString());

        FileStatus jarStatus = fs.getFileStatus(jarPath);
        LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
        amJarRsrc.setType(LocalResourceType.FILE);
        amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath));
        amJarRsrc.setTimestamp(jarStatus.getModificationTime());
        amJarRsrc.setSize(jarStatus.getLen());

        // this creates a symlink in the working directory
        localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, amJarRsrc);

        // add hama related jar files to localresources for container
        List<File> hamaJars;
        if (System.getProperty("hama.home.dir") != null)
            hamaJars = localJarfromPath(System.getProperty("hama.home.dir"));
        else
            hamaJars = localJarfromPath(getConf().get("hama.home.dir"));
        String hamaPath = getSystemDir() + "/hama";
        for (File fileEntry : hamaJars) {
            addToLocalResources(fs, fileEntry.getCanonicalPath(), hamaPath, fileEntry.getName(),
                    localResources);
        }

        // Set the local resources into the launch context
        amContainer.setLocalResources(localResources);

        // Set up the environment needed for the launch context
        Map<String, String> env = new HashMap<String, String>();
        // Assuming our classes or jars are available as local resources in the
        // working directory from which the command will be run, we need to append
        // "." to the path.
        // By default, all the hadoop specific classpaths will already be available
        // in $CLASSPATH, so we should be careful not to overwrite it.
        StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$())
                .append(File.pathSeparatorChar).append("./*");
        for (String c : yarnConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
            classPathEnv.append(File.pathSeparatorChar);
            classPathEnv.append(c.trim());
        }

        env.put(YARNBSPConstants.HAMA_YARN_LOCATION, jarPath.toUri().toString());
        env.put(YARNBSPConstants.HAMA_YARN_SIZE, Long.toString(jarStatus.getLen()));
        env.put(YARNBSPConstants.HAMA_YARN_TIMESTAMP, Long.toString(jarStatus.getModificationTime()));

        env.put(YARNBSPConstants.HAMA_LOCATION, hamaPath);
        env.put("CLASSPATH", classPathEnv.toString());
        amContainer.setEnvironment(env);

        // Set the necessary command to execute on the allocated container
        Vector<CharSequence> vargs = new Vector<CharSequence>(5);
        vargs.add("${JAVA_HOME}/bin/java");
        vargs.add("-cp " + classPathEnv + "");
        vargs.add(ApplicationMaster.class.getCanonicalName());
        vargs.add(submitJobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());

        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stderr");

        // Get final commmand
        StringBuilder command = new StringBuilder();
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }

        List<String> commands = new ArrayList<String>();
        commands.add(command.toString());
        amContainer.setCommands(commands);

        LOG.debug("Start command: " + command);

        Resource capability = Records.newRecord(Resource.class);
        // we have at least 3 threads, which comsumes 1mb each, for each bsptask and
        // a base usage of 100mb
        capability.setMemory(3 * job.getNumBspTask() + getConf().getInt("hama.appmaster.memory.mb", 100));
        LOG.info("Set memory for the application master to " + capability.getMemory() + "mb!");

        // Set the container launch content into the ApplicationSubmissionContext
        appContext.setResource(capability);

        // Setup security tokens
        if (UserGroupInformation.isSecurityEnabled()) {
            // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
            Credentials credentials = new Credentials();
            String tokenRenewer = yarnConf.get(YarnConfiguration.RM_PRINCIPAL);
            if (tokenRenewer == null || tokenRenewer.length() == 0) {
                throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
            }

            // For now, only getting tokens for the default file-system.
            final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + fs.getUri() + "; " + token);
                }
            }
            DataOutputBuffer dob = new DataOutputBuffer();
            credentials.writeTokenStorageToStream(dob);
            ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
            amContainer.setTokens(fsTokens);
        }

        appContext.setAMContainerSpec(amContainer);

        // Create the request to send to the ApplicationsManager
        ApplicationId appId = appContext.getApplicationId();
        yarnClient.submitApplication(appContext);

        return monitorApplication(appId) ? new NetworkedJob() : null;
    } catch (YarnException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:org.apache.hama.bsp.YARNBSPJobClient.java

License:Apache License

private void addToLocalResources(FileSystem fs, String fileSrcPath, String fileDstPath, String fileName,
        Map<String, LocalResource> localResources) throws IOException {
    Path dstPath = new Path(fileDstPath, fileName);
    dstPath = fs.makeQualified(dstPath);
    fs.copyFromLocalFile(false, true, new Path(fileSrcPath), dstPath);
    FileStatus fileStatus = fs.getFileStatus(dstPath);
    LocalResource localRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dstPath.toUri()),
            LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, fileStatus.getLen(),
            fileStatus.getModificationTime());
    localResources.put(fileName, localRsrc);
}

From source file:org.apache.hama.util.GenericOptionsParser.java

License:Apache License

/**
 * Modify configuration according user-specified generic options
 * /*  w w w .j a  v  a 2 s . c o m*/
 * @param conf Configuration to be modified
 * @param line User-specified generic options
 */
private void processGeneralOptions(HamaConfiguration conf, CommandLine line) throws IOException {

    if (line.hasOption("conf")) {
        String[] values = line.getOptionValues("conf");
        for (String value : values) {
            conf.addResource(new Path(value));
        }
    }
    if (line.hasOption("libjars")) {
        conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf));
        // setting libjars in client classpath
        URL[] libjars = getLibJars(conf);
        if (libjars != null && libjars.length > 0) {
            conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
            Thread.currentThread().setContextClassLoader(
                    new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
        }
    }
    if (line.hasOption("files")) {
        conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
        conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
        String[] property = line.getOptionValues('D');
        for (String prop : property) {
            String[] keyval = prop.split("=", 2);
            if (keyval.length == 2) {
                conf.set(keyval[0], keyval[1]);
            }
        }
    }

    conf.setBoolean("hama.used.genericoptionsparser", true);

    // tokensFile
    if (line.hasOption("tokenCacheFile")) {
        String fileName = line.getOptionValue("tokenCacheFile");
        // check if the local file exists
        try {
            FileSystem localFs = FileSystem.getLocal(conf);
            Path p = new Path(fileName);
            if (!localFs.exists(p)) {
                throw new FileNotFoundException("File " + fileName + " does not exist.");
            }

            LOG.debug("setting conf tokensFile: " + fileName);
            conf.set("hama.job.credentials.json", localFs.makeQualified(p).toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:org.apache.hcatalog.mapreduce.HCatBaseInputFormat.java

License:Apache License

private void setInputPath(JobConf jobConf, String location) throws IOException {

    // ideally we should just call FileInputFormat.setInputPaths() here - but
    // that won't work since FileInputFormat.setInputPaths() needs
    // a Job object instead of a JobContext which we are handed here

    int length = location.length();
    int curlyOpen = 0;
    int pathStart = 0;
    boolean globPattern = false;
    List<String> pathStrings = new ArrayList<String>();

    for (int i = 0; i < length; i++) {
        char ch = location.charAt(i);
        switch (ch) {
        case '{': {
            curlyOpen++;//from   www .j av  a2  s  .c o  m
            if (!globPattern) {
                globPattern = true;
            }
            break;
        }
        case '}': {
            curlyOpen--;
            if (curlyOpen == 0 && globPattern) {
                globPattern = false;
            }
            break;
        }
        case ',': {
            if (!globPattern) {
                pathStrings.add(location.substring(pathStart, i));
                pathStart = i + 1;
            }
            break;
        }
        }
    }
    pathStrings.add(location.substring(pathStart, length));

    Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0]));
    String separator = "";
    StringBuilder str = new StringBuilder();

    for (Path path : paths) {
        FileSystem fs = path.getFileSystem(jobConf);
        final String qualifiedPath = fs.makeQualified(path).toString();
        str.append(separator).append(StringUtils.escapeString(qualifiedPath));
        separator = StringUtils.COMMA_STR;
    }

    jobConf.set("mapred.input.dir", str.toString());
}