Example usage for org.apache.hadoop.fs FileSystem getUri

List of usage examples for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitterTest.java

License:Apache License

private Collection<WorkUnit> createMockSplitWorkUnits(FileSystem fs, long fileLen, long blockSize,
        long maxSplitSize) throws Exception {
    FileStatus file = mock(FileStatus.class);
    when(file.getLen()).thenReturn(fileLen);
    when(file.getBlockSize()).thenReturn(blockSize);

    URI uri = new URI("hdfs", "dummyhost", "/test", "test");
    Path path = new Path(uri);
    when(fs.getUri()).thenReturn(uri);

    CopyableDatasetMetadata cdm = new CopyableDatasetMetadata(new TestCopyableDataset(path));

    CopyableFile cf = CopyableFileUtils.getTestCopyableFile();
    CopyableFile spy = spy(cf);// w w  w. j av a2s.c o  m
    doReturn(file).when(spy).getFileStatus();
    doReturn(blockSize).when(spy).getBlockSize(any(FileSystem.class));
    doReturn(path).when(spy).getDestination();

    WorkUnit wu = WorkUnit.createEmpty();
    wu.setProp(DistcpFileSplitter.MAX_SPLIT_SIZE_KEY, maxSplitSize);
    wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0),
            path.toString());
    CopySource.setWorkUnitGuid(wu, Guid.fromStrings(wu.toString()));
    CopySource.serializeCopyEntity(wu, cf);
    CopySource.serializeCopyableDataset(wu, cdm);

    return DistcpFileSplitter.splitFile(spy, wu, fs);
}

From source file:org.apache.gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java

License:Apache License

/**
 * Write the contents of input stream into staging path.
 *
 * <p>/*from ww w . ja  v  a 2 s  .c  o  m*/
 *   WriteAt indicates the path where the contents of the input stream should be written. When this method is called,
 *   the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned,
 *   the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt
 *   will be ignored.
 * </p>
 *
 * @param inputStream {@link FSDataInputStream} whose contents should be written to staging path.
 * @param writeAt {@link Path} at which contents should be written.
 * @param copyableFile {@link org.apache.gobblin.data.management.copy.CopyEntity} that generated this copy operation.
 * @param record The actual {@link FileAwareInputStream} passed to the write method.
 * @throws IOException
 */
protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile,
        FileAwareInputStream record) throws IOException {

    final short replication = this.state.getPropAsShort(ConfigurationKeys.WRITER_FILE_REPLICATION_FACTOR,
            copyableFile.getReplication(this.fs));
    final long blockSize = copyableFile.getBlockSize(this.fs);
    final long fileSize = copyableFile.getFileStatus().getLen();

    long expectedBytes = fileSize;
    Long maxBytes = null;
    // Whether writer must write EXACTLY maxBytes.
    boolean mustMatchMaxBytes = false;

    if (record.getSplit().isPresent()) {
        maxBytes = record.getSplit().get().getHighPosition() - record.getSplit().get().getLowPosition();
        if (record.getSplit().get().isLastSplit()) {
            expectedBytes = fileSize % blockSize;
            mustMatchMaxBytes = false;
        } else {
            expectedBytes = maxBytes;
            mustMatchMaxBytes = true;
        }
    }

    Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() {
        @Override
        public boolean apply(FileStatus input) {
            return input.getReplication() == replication && input.getBlockSize() == blockSize;
        }
    };
    Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile,
            fileStatusAttributesFilter);

    if (persistedFile.isPresent()) {
        log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt));
        this.fs.rename(persistedFile.get().getPath(), writeAt);
    } else {
        // Copy empty directories
        if (copyableFile.getFileStatus().isDirectory()) {
            this.fs.mkdirs(writeAt);
            return;
        }

        OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096),
                replication, blockSize);
        if (encryptionConfig != null) {
            os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os);
        }
        try {
            FileSystem defaultFS = FileSystem.get(new Configuration());
            StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker
                    .getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey());
            ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream)
                    .sourceURI(copyableFile.getOrigin().getPath()
                            .makeQualified(defaultFS.getUri(), defaultFS.getWorkingDirectory()).toUri())
                    .targetURI(this.fs.makeQualified(writeAt).toUri()).build();
            StreamCopier copier = new StreamCopier(throttledInputStream, os, maxBytes)
                    .withBufferSize(this.bufferSize);

            log.info("File {}: Starting copy", copyableFile.getOrigin().getPath());

            if (isInstrumentationEnabled()) {
                copier.withCopySpeedMeter(this.copySpeedMeter);
            }
            long numBytes = copier.copy();
            if ((this.checkFileSize || mustMatchMaxBytes) && numBytes != expectedBytes) {
                throw new IOException(String.format("Incomplete write: expected %d, wrote %d bytes.",
                        expectedBytes, numBytes));
            }
            this.bytesWritten.addAndGet(numBytes);
            if (isInstrumentationEnabled()) {
                log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(),
                        this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate());
            } else {
                log.info("File {} copied.", copyableFile.getOrigin().getPath());
            }
        } catch (NotConfiguredException nce) {
            log.warn("Broker error. Some features of stream copier may not be available.", nce);
        } finally {
            os.close();
            inputStream.close();
        }
    }
}

From source file:org.apache.gobblin.filesystem.MetricsFileSystemInstrumentation.java

License:Apache License

public MetricsFileSystemInstrumentation(FileSystem underlying) {
    super(underlying);
    this.closer = Closer.create();
    this.metricContext = new MetricContext.Builder(underlying.getUri() + "_metrics").build();
    this.metricContext = this.closer.register(metricContext);

    this.listStatusTimer = this.metricContext.timer("listStatus");
    this.listFilesTimer = this.metricContext.timer("listFiles");
    this.globStatusTimer = this.metricContext.timer("globStatus");
    this.mkdirTimer = this.metricContext.timer("mkdirs");
    this.renameTimer = this.metricContext.timer("rename");
    this.deleteTimer = this.metricContext.timer("delete");
    this.createTimer = this.metricContext.timer("create");
    this.openTimer = this.metricContext.timer("open");
    this.setOwnerTimer = this.metricContext.timer("setOwner");
    this.getFileStatusTimer = this.metricContext.timer("getFileStatus");
    this.setPermissionTimer = this.metricContext.timer("setPermission");
    this.setTimesTimer = this.metricContext.timer("setTimes");
    this.appendTimer = this.metricContext.timer("append");
    this.concatTimer = this.metricContext.timer("concat");

    this.allTimers = ImmutableList.<ContextAwareTimer>builder()
            .add(this.listStatusTimer, this.listFilesTimer, this.globStatusTimer, this.mkdirTimer,
                    this.renameTimer, this.deleteTimer, this.createTimer, this.openTimer, this.setOwnerTimer,
                    this.getFileStatusTimer, this.setPermissionTimer, this.setTimesTimer, this.appendTimer,
                    this.concatTimer)
            .build();/*from  w  w  w .ja  v  a 2 s.co  m*/
}

From source file:org.apache.gobblin.publisher.BaseDataPublisher.java

License:Apache License

/**
 * Create destination dataset descriptor
 *///from   w w w.  jav a2  s  .  com
protected DatasetDescriptor createDestinationDescriptor(WorkUnitState state, int branchId) {
    Path publisherOutputDir = getPublisherOutputDir(state, branchId);
    FileSystem fs = this.publisherFileSystemByBranches.get(branchId);
    DatasetDescriptor destination = new DatasetDescriptor(fs.getScheme(), publisherOutputDir.toString());
    destination.addMetadata(DatasetConstants.FS_URI, fs.getUri().toString());
    destination.addMetadata(DatasetConstants.BRANCH, String.valueOf(branchId));
    return destination;
}

From source file:org.apache.hadoop.examples.Sort.java

License:Apache License

/**
 * The main driver for sort program./*from  w w  w .  j a  va 2  s . co  m*/
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }
    // Set user-supplied (possibly default) job configs
    job = Job.getInstance(conf);
    job.setJobName("sorter");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    job.setNumReduceTasks(num_reduces);

    job.setInputFormatClass(inputFormatClass);
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        FileSystem fs = inputDir.getFileSystem(conf);
        inputDir = inputDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
        InputSampler.<K, V>writePartitionFile(job, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        job.addCacheFile(partitionUri);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with "
            + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:org.apache.hama.bsp.BSPTaskLauncher.java

License:Apache License

private GetContainerStatusesRequest setupContainer(Container allocatedContainer, ContainerManagementProtocol cm,
        String user, int id) throws IOException, YarnException {
    LOG.info("Setting up a container for user " + user + " with id of " + id + " and containerID of "
            + allocatedContainer.getId() + " as " + user);
    // Now we setup a ContainerLaunchContext
    ContainerLaunchContext ctx = Records.newRecord(ContainerLaunchContext.class);

    // Set the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
    LocalResource packageResource = Records.newRecord(LocalResource.class);
    FileSystem fs = FileSystem.get(conf);
    Path packageFile = new Path(System.getenv(YARNBSPConstants.HAMA_YARN_LOCATION));
    URL packageUrl = ConverterUtils
            .getYarnUrlFromPath(packageFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()));
    LOG.info("PackageURL has been composed to " + packageUrl.toString());
    try {//  w  ww  . j a  va 2 s .  c  o m
        LOG.info("Reverting packageURL to path: " + ConverterUtils.getPathFromYarnURL(packageUrl));
    } catch (URISyntaxException e) {
        LOG.fatal("If you see this error the workarround does not work", e);
    }

    packageResource.setResource(packageUrl);
    packageResource.setSize(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_YARN_SIZE)));
    packageResource.setTimestamp(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_YARN_TIMESTAMP)));
    packageResource.setType(LocalResourceType.FILE);
    packageResource.setVisibility(LocalResourceVisibility.APPLICATION);

    localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, packageResource);

    Path hamaReleaseFile = new Path(System.getenv(YARNBSPConstants.HAMA_RELEASE_LOCATION));
    URL hamaReleaseUrl = ConverterUtils
            .getYarnUrlFromPath(hamaReleaseFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()));
    LOG.info("Hama release URL has been composed to " + hamaReleaseUrl.toString());

    LocalResource hamaReleaseRsrc = Records.newRecord(LocalResource.class);
    hamaReleaseRsrc.setResource(hamaReleaseUrl);
    hamaReleaseRsrc.setSize(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_RELEASE_SIZE)));
    hamaReleaseRsrc.setTimestamp(Long.parseLong(System.getenv(YARNBSPConstants.HAMA_RELEASE_TIMESTAMP)));
    hamaReleaseRsrc.setType(LocalResourceType.ARCHIVE);
    hamaReleaseRsrc.setVisibility(LocalResourceVisibility.APPLICATION);

    localResources.put(YARNBSPConstants.HAMA_SYMLINK, hamaReleaseRsrc);

    ctx.setLocalResources(localResources);

    /*
     * TODO Package classpath seems not to work if you're in pseudo distributed
     * mode, because the resource must not be moved, it will never be unpacked.
     * So we will check if our jar file has the file:// prefix and put it into
     * the CP directly
     */

    StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$())
            .append(File.pathSeparatorChar).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
        classPathEnv.append(File.pathSeparatorChar);
        classPathEnv.append(c.trim());
    }

    classPathEnv.append(File.pathSeparator);
    classPathEnv
            .append("./" + YARNBSPConstants.HAMA_SYMLINK + "/" + YARNBSPConstants.HAMA_RELEASE_VERSION + "/*");
    classPathEnv.append(File.pathSeparator);
    classPathEnv.append(
            "./" + YARNBSPConstants.HAMA_SYMLINK + "/" + YARNBSPConstants.HAMA_RELEASE_VERSION + "/lib/*");

    Vector<CharSequence> vargs = new Vector<CharSequence>();
    vargs.add("${JAVA_HOME}/bin/java");
    vargs.add("-cp " + classPathEnv + "");
    vargs.add(BSPRunner.class.getCanonicalName());

    vargs.add(jobId.getJtIdentifier());
    vargs.add(Integer.toString(id));
    vargs.add(this.jobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/bsp.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/bsp.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    ctx.setCommands(commands);
    LOG.info("Starting command: " + commands);

    StartContainerRequest startReq = Records.newRecord(StartContainerRequest.class);
    startReq.setContainerLaunchContext(ctx);
    startReq.setContainerToken(allocatedContainer.getContainerToken());

    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(startReq);
    StartContainersRequest requestList = StartContainersRequest.newInstance(list);
    cm.startContainers(requestList);

    GetContainerStatusesRequest statusReq = Records.newRecord(GetContainerStatusesRequest.class);
    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(allocatedContainer.getId());
    statusReq.setContainerIds(containerIds);
    return statusReq;
}

From source file:org.apache.hama.bsp.YARNBSPJobClient.java

License:Apache License

@Override
protected RunningJob launchJob(BSPJobID jobId, BSPJob normalJob, Path submitJobFile, FileSystem pFs)
        throws IOException {
    YARNBSPJob job = (YARNBSPJob) normalJob;

    LOG.info("Submitting job...");
    if (getConf().get("bsp.child.mem.in.mb") == null) {
        LOG.warn("BSP Child memory has not been set, YARN will guess your needs or use default values.");
    }/*from  w w  w .j  a va  2 s.c  om*/

    FileSystem fs = pFs;
    if (fs == null) {
        fs = FileSystem.get(getConf());
    }

    if (getConf().get("bsp.user.name") == null) {
        String s = getUnixUserName();
        getConf().set("bsp.user.name", s);
        LOG.debug("Retrieved username: " + s);
    }

    yarnClient.start();
    try {
        YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
        LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers="
                + clusterMetrics.getNumNodeManagers());

        List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
        LOG.info("Got Cluster node info from ASM");
        for (NodeReport node : clusterNodeReports) {
            LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                    + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                    + node.getNumContainers());
        }

        QueueInfo queueInfo = yarnClient.getQueueInfo("default");
        LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
                + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
                + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
                + queueInfo.getChildQueues().size());

        List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
        for (QueueUserACLInfo aclInfo : listAclInfo) {
            for (QueueACL userAcl : aclInfo.getUserAcls()) {
                LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                        + userAcl.name());
            }
        }

        // Get a new application id
        YarnClientApplication app = yarnClient.createApplication();

        // Create a new ApplicationSubmissionContext
        //ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
        ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();

        id = appContext.getApplicationId();

        // set the application name
        appContext.setApplicationName(job.getJobName());

        // Create a new container launch context for the AM's container
        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

        // Define the local resources required
        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
        // Lets assume the jar we need for our ApplicationMaster is available in
        // HDFS at a certain known path to us and we want to make it available to
        // the ApplicationMaster in the launched container
        if (job.getJar() == null) {
            throw new IllegalArgumentException("Jar must be set in order to run the application!");
        }

        Path jarPath = new Path(job.getJar());
        jarPath = fs.makeQualified(jarPath);
        getConf().set("bsp.jar", jarPath.makeQualified(fs.getUri(), jarPath).toString());

        FileStatus jarStatus = fs.getFileStatus(jarPath);
        LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
        amJarRsrc.setType(LocalResourceType.FILE);
        amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(jarPath));
        amJarRsrc.setTimestamp(jarStatus.getModificationTime());
        amJarRsrc.setSize(jarStatus.getLen());

        // this creates a symlink in the working directory
        localResources.put(YARNBSPConstants.APP_MASTER_JAR_PATH, amJarRsrc);

        // add hama related jar files to localresources for container
        List<File> hamaJars;
        if (System.getProperty("hama.home.dir") != null)
            hamaJars = localJarfromPath(System.getProperty("hama.home.dir"));
        else
            hamaJars = localJarfromPath(getConf().get("hama.home.dir"));
        String hamaPath = getSystemDir() + "/hama";
        for (File fileEntry : hamaJars) {
            addToLocalResources(fs, fileEntry.getCanonicalPath(), hamaPath, fileEntry.getName(),
                    localResources);
        }

        // Set the local resources into the launch context
        amContainer.setLocalResources(localResources);

        // Set up the environment needed for the launch context
        Map<String, String> env = new HashMap<String, String>();
        // Assuming our classes or jars are available as local resources in the
        // working directory from which the command will be run, we need to append
        // "." to the path.
        // By default, all the hadoop specific classpaths will already be available
        // in $CLASSPATH, so we should be careful not to overwrite it.
        StringBuilder classPathEnv = new StringBuilder(ApplicationConstants.Environment.CLASSPATH.$())
                .append(File.pathSeparatorChar).append("./*");
        for (String c : yarnConf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
            classPathEnv.append(File.pathSeparatorChar);
            classPathEnv.append(c.trim());
        }

        env.put(YARNBSPConstants.HAMA_YARN_LOCATION, jarPath.toUri().toString());
        env.put(YARNBSPConstants.HAMA_YARN_SIZE, Long.toString(jarStatus.getLen()));
        env.put(YARNBSPConstants.HAMA_YARN_TIMESTAMP, Long.toString(jarStatus.getModificationTime()));

        env.put(YARNBSPConstants.HAMA_LOCATION, hamaPath);
        env.put("CLASSPATH", classPathEnv.toString());
        amContainer.setEnvironment(env);

        // Set the necessary command to execute on the allocated container
        Vector<CharSequence> vargs = new Vector<CharSequence>(5);
        vargs.add("${JAVA_HOME}/bin/java");
        vargs.add("-cp " + classPathEnv + "");
        vargs.add(ApplicationMaster.class.getCanonicalName());
        vargs.add(submitJobFile.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());

        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/hama-appmaster.stderr");

        // Get final commmand
        StringBuilder command = new StringBuilder();
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }

        List<String> commands = new ArrayList<String>();
        commands.add(command.toString());
        amContainer.setCommands(commands);

        LOG.debug("Start command: " + command);

        Resource capability = Records.newRecord(Resource.class);
        // we have at least 3 threads, which comsumes 1mb each, for each bsptask and
        // a base usage of 100mb
        capability.setMemory(3 * job.getNumBspTask() + getConf().getInt("hama.appmaster.memory.mb", 100));
        LOG.info("Set memory for the application master to " + capability.getMemory() + "mb!");

        // Set the container launch content into the ApplicationSubmissionContext
        appContext.setResource(capability);

        // Setup security tokens
        if (UserGroupInformation.isSecurityEnabled()) {
            // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
            Credentials credentials = new Credentials();
            String tokenRenewer = yarnConf.get(YarnConfiguration.RM_PRINCIPAL);
            if (tokenRenewer == null || tokenRenewer.length() == 0) {
                throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
            }

            // For now, only getting tokens for the default file-system.
            final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + fs.getUri() + "; " + token);
                }
            }
            DataOutputBuffer dob = new DataOutputBuffer();
            credentials.writeTokenStorageToStream(dob);
            ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
            amContainer.setTokens(fsTokens);
        }

        appContext.setAMContainerSpec(amContainer);

        // Create the request to send to the ApplicationsManager
        ApplicationId appId = appContext.getApplicationId();
        yarnClient.submitApplication(appContext);

        return monitorApplication(appId) ? new NetworkedJob() : null;
    } catch (YarnException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:org.apache.hama.HamaClusterTestCase.java

License:Apache License

@Override
protected void setUp() throws Exception {
    try {/* w w  w  .j a  v a2s. c  o  m*/
        if (this.startDfs) {
            // This spews a bunch of warnings about missing scheme. TODO: fix.
            this.dfsCluster = new MiniDFSCluster(0, this.conf, 2, true, true, true, null, null, null, null);

            // mangle the conf so that the fs parameter points to the minidfs we
            // just started up
            FileSystem filesystem = dfsCluster.getFileSystem();
            conf.set("fs.defaultFS", filesystem.getUri().toString());
            Path parentdir = filesystem.getHomeDirectory();

            filesystem.mkdirs(parentdir);
        }

        // do the super setup now. if we had done it first, then we would have
        // gotten our conf all mangled and a local fs started up.
        super.setUp();

        // start the instance
        hamaClusterSetup();
    } catch (Exception e) {
        if (zooKeeperCluster != null) {
            zooKeeperCluster.shutdown();
        }
        if (dfsCluster != null) {
            shutdownDfs(dfsCluster);
        }
        throw e;
    }
}

From source file:org.apache.hcatalog.mapreduce.TestHCatMultiOutputFormat.java

License:Apache License

@BeforeClass
public static void setup() throws Exception {
    String testDir = System.getProperty("test.data.dir", "./");
    testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/";
    workDir = new File(new File(testDir).getCanonicalPath());
    FileUtil.fullyDelete(workDir);//from ww w. j  ava 2  s  .  co m
    workDir.mkdirs();

    warehousedir = new Path(workDir + "/warehouse");

    // Run hive metastore server
    t = new Thread(new RunMS());
    t.start();

    // LocalJobRunner does not work with mapreduce OutputCommitter. So need
    // to use MiniMRCluster. MAPREDUCE-2350
    Configuration conf = new Configuration(true);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");

    FileSystem fs = FileSystem.get(conf);
    System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
    mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, new JobConf(conf));
    mrConf = mrCluster.createJobConf();
    fs.mkdirs(warehousedir);

    initializeSetup();
}

From source file:org.apache.hcatalog.shims.HCatHadoopShims20S.java

License:Apache License

@Override
public boolean isFileInHDFS(FileSystem fs, Path path) throws IOException {
    // In hadoop 1.x.x the file system URI is sufficient to determine the uri of the file
    return "hdfs".equals(fs.getUri().getScheme());
}