Example usage for org.apache.hadoop.fs FileSystem getUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getUri.

Prototype

public abstract URI getUri();

Source Link

Document

Returns a URI which identifies this FileSystem.

Usage

From source file:org.springframework.xd.shell.command.AbstractJobCommandsWithHadoopTests.java

License:Apache License

@Test
public void testLaunchFtpHadoopJob() throws Throwable {
    logger.info("Launch FTP->HDFS batch job");
    TestFtpServer server = new TestFtpServer("FtpHadoop");
    server.before();//www  . j  a  v  a  2  s .c  o  m

    // clean up from old tests
    FileSystem fs = FileSystem.get(configuration);

    Path p1 = new Path("foo/ftpSource/ftpSource1.txt");
    fs.delete(p1, true);
    Path p2 = new Path("foo/ftpSource/ftpSource2.txt");
    fs.delete(p2, true);
    assertFalse(fs.exists(p1));
    assertFalse(fs.exists(p2));

    try {
        int port = server.getPort();
        executeJobCreate("myftphdfs", "ftphdfs --partitionResultsTimeout=120000 --port=" + port + " --fsUri="
                + fs.getUri().toString());
        checkForJobInList("myftphdfs", "ftphdfs --partitionResultsTimeout=120000 --port=" + port + " --fsUri="
                + fs.getUri().toString(), true);
        executeJobLaunch("myftphdfs", "{\"-remoteDirectory\":\"ftpSource\",\"hdfsDirectory\":\"foo\"}");

        Table jobExecutions = listJobExecutions();
        int n = 0;
        while (!"COMPLETED".equals(jobExecutions.getRows().get(0).getValue(5))) {
            Thread.sleep(100);
            assertTrue(n++ < 100);
            jobExecutions = listJobExecutions();
        }

        assertTrue(fs.exists(p1));
        assertTrue(fs.exists(p2));

        FSDataInputStream stream = fs.open(p1);
        byte[] out = new byte[7];
        stream.readFully(out);
        stream.close();
        assertEquals("source1", new String(out));

        stream = fs.open(p2);
        stream.readFully(out);
        stream.close();
        assertEquals("source2", new String(out));
    } finally {
        server.after();
    }
}

From source file:org.starschema.hadoop.yarn.applications.distributedshell.Client.java

License:Apache License

/**
 * Main run function for the client//from w  w w. j  a v  a2 s .  c  om
 * @return true if application completed successfully
 * @throws IOException
 * @throws YarnException
 */
public boolean run() throws IOException, YarnException {

    LOG.info("Running Client");
    yarnClient.start();

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    List<NodeReport> clusterNodeReports = yarnClient.getNodeReports(NodeState.RUNNING);
    LOG.info("Got Cluster node info from ASM");
    for (NodeReport node : clusterNodeReports) {
        LOG.info("Got node report from ASM for" + ", nodeId=" + node.getNodeId() + ", nodeAddress"
                + node.getHttpAddress() + ", nodeRackName" + node.getRackName() + ", nodeNumContainers"
                + node.getNumContainers());
    }

    QueueInfo queueInfo = yarnClient.getQueueInfo(this.amQueue);
    LOG.info("Queue info" + ", queueName=" + queueInfo.getQueueName() + ", queueCurrentCapacity="
            + queueInfo.getCurrentCapacity() + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
            + ", queueApplicationCount=" + queueInfo.getApplications().size() + ", queueChildQueueCount="
            + queueInfo.getChildQueues().size());

    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    if (domainId != null && domainId.length() > 0 && toCreateDomain) {
        prepareTimelineDomain();
    }

    // Get a new application id
    YarnClientApplication app = yarnClient.createApplication();
    GetNewApplicationResponse appResponse = app.getNewApplicationResponse();
    // TODO get min/max resource capabilities from RM and change memory ask if needed
    // If we do not have min/max, we may not be able to correctly request 
    // the required resources from the RM for the app master
    // Memory ask has to be a multiple of min and less than max. 
    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = appResponse.getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);

    // A resource ask cannot exceed the max. 
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    int maxVCores = appResponse.getMaximumResourceCapability().getVirtualCores();
    LOG.info("Max virtual cores capabililty of resources in this cluster " + maxVCores);

    if (amVCores > maxVCores) {
        LOG.info("AM virtual cores specified above max threshold of cluster. " + "Using max value."
                + ", specified=" + amVCores + ", max=" + maxVCores);
        amVCores = maxVCores;
    }

    // set the application name
    ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
    ApplicationId appId = appContext.getApplicationId();

    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
    appContext.setApplicationName(appName);

    if (attemptFailuresValidityInterval >= 0) {
        appContext.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources         
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    LOG.info("Copy App Master jar from local filesystem and add to local environment");
    // Copy the application master jar to the filesystem 
    // Create a local resource to point to the destination jar path 
    FileSystem fs = FileSystem.get(conf);
    addToLocalResources(fs, appMasterJar, appMasterJarPath, appId.toString(), localResources, null);

    // Set the log4j properties if needed 
    if (!log4jPropFile.isEmpty()) {
        addToLocalResources(fs, log4jPropFile, log4jPath, appId.toString(), localResources, null);
    }

    // The shell script has to be made available on the final container(s)
    // where it will be executed. 
    // To do this, we need to first copy into the filesystem that is visible 
    // to the yarn framework. 
    // We do not need to set this as a local resource for the application 
    // master as the application master does not need it.       
    String hdfsShellScriptLocation = "";
    long hdfsShellScriptLen = 0;
    long hdfsShellScriptTimestamp = 0;
    if (!shellScriptPath.isEmpty()) {
        Path shellSrc = new Path(shellScriptPath);
        String shellPathSuffix = appName + "/" + appId.toString() + "/" + SCRIPT_PATH;
        Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
        fs.copyFromLocalFile(false, true, shellSrc, shellDst);
        hdfsShellScriptLocation = shellDst.toUri().toString();
        FileStatus shellFileStatus = fs.getFileStatus(shellDst);
        hdfsShellScriptLen = shellFileStatus.getLen();
        hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
    }

    LOG.info("Copy Hazelcast zip from local filesystem and add to local environment");
    String hdfsHazelLocation = "";
    long hdfsHazelLen = 0;
    long hdfsHazelTimestamp = 0;
    if (!hazelcastZip.isEmpty()) {
        Path hazelSrc = new Path(hazelcastZip);
        String hazelPathSuffix = appName + "/" + appId.toString() + "/" + HAZELCAST_PATH;
        Path hazelDst = new Path(fs.getHomeDirectory(), hazelPathSuffix);
        fs.copyFromLocalFile(false, true, hazelSrc, hazelDst);
        hdfsHazelLocation = hazelDst.toUri().toString();
        LOG.info("Hazelcast zip location: " + hdfsHazelLocation);
        FileStatus hazelFileStatus = fs.getFileStatus(hazelDst);
        hdfsHazelLen = hazelFileStatus.getLen();
        hdfsHazelTimestamp = hazelFileStatus.getModificationTime();
    }

    if (!shellCommand.isEmpty()) {
        addToLocalResources(fs, null, shellCommandPath, appId.toString(), localResources, shellCommand);
    }

    if (shellArgs.length > 0) {
        addToLocalResources(fs, null, shellArgsPath, appId.toString(), localResources,
                StringUtils.join(shellArgs, " "));
    }

    // Set the necessary security tokens as needed
    //amContainer.setContainerTokens(containerToken);

    // Set the env variables to be setup in the env where the application master will be run
    LOG.info("Set the environment for the application master");
    Map<String, String> env = new HashMap<String, String>();

    // put location of shell script into env
    // using the env info, the application master will create the correct local resource for the 
    // eventual containers that will be launched to execute the shell scripts
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
    env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
    env.put(DSConstants.HAZELLOCATION, hdfsHazelLocation);
    env.put(DSConstants.HAZELTIMESTAMP, Long.toString(hdfsHazelTimestamp));
    env.put(DSConstants.HAZELLEN, Long.toString(hdfsHazelLen));
    if (domainId != null && domainId.length() > 0) {
        env.put(DSConstants.DISTRIBUTEDSHELLTIMELINEDOMAIN, domainId);
    }

    // Add AppMaster.jar location to classpath       
    // At some point we should not be required to add 
    // the hadoop specific classpaths to the env. 
    // It should be provided out of the box. 
    // For now setting all required classpaths including
    // the classpath to "." for the application jar
    StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
            .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
    for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
            YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
        classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
        classPathEnv.append(c.trim());
    }
    classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./log4j.properties");

    // add the runtime classpath needed for tests to work
    if (conf.getBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, false)) {
        classPathEnv.append(':');
        classPathEnv.append(System.getProperty("java.class.path"));
    }

    env.put("CLASSPATH", classPathEnv.toString());

    // Set the necessary command to execute the application master 
    Vector<CharSequence> vargs = new Vector<CharSequence>(30);

    // Set java executable command 
    LOG.info("Setting up app master command");
    vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
    // Set Xmx based on am memory size
    vargs.add("-Xmx" + amMemory + "m");
    // Set class name 
    vargs.add(appMasterMainClass);
    // Set params for Application Master
    vargs.add("--container_memory " + String.valueOf(containerMemory));
    vargs.add("--container_vcores " + String.valueOf(containerVirtualCores));
    vargs.add("--num_containers " + String.valueOf(numContainers));
    if (null != nodeLabelExpression) {
        appContext.setNodeLabelExpression(nodeLabelExpression);
    }
    vargs.add("--priority " + String.valueOf(shellCmdPriority));

    for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
        vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
    }
    if (debugFlag) {
        vargs.add("--debug");
    }

    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

    // Get final commmand
    StringBuilder command = new StringBuilder();
    for (CharSequence str : vargs) {
        command.append(str).append(" ");
    }

    LOG.info("Completed setting up app master command " + command.toString());
    List<String> commands = new ArrayList<String>();
    commands.add(command.toString());

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(localResources, env, commands, null,
            null, null);

    // Set up resource type requirements
    // For now, both memory and vcores are supported, so we set memory and 
    // vcores requirements
    Resource capability = Resource.newInstance(amMemory, amVCores);
    appContext.setResource(capability);

    // Service data is a binary blob that can be passed to the application
    // Not needed in this scenario
    // amContainer.setServiceData(serviceData);

    // Setup security tokens
    if (UserGroupInformation.isSecurityEnabled()) {
        // Note: Credentials class is marked as LimitedPrivate for HDFS and MapReduce
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOG.info("Got dt for " + fs.getUri() + "; " + token);
            }
        }
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    appContext.setAMContainerSpec(amContainer);

    // Set the priority for the application master
    // TODO - what is the range for priority? how to decide? 
    Priority pri = Priority.newInstance(amPriority);
    appContext.setPriority(pri);

    // Set the queue to which this application is to be submitted in the RM
    appContext.setQueue(amQueue);

    // Submit the application to the applications manager
    // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
    // Ignore the response as either a valid response object is returned on success 
    // or an exception thrown to denote some form of a failure
    LOG.info("Submitting application to ASM");

    yarnClient.submitApplication(appContext);

    // TODO
    // Try submitting the same request again
    // app submission failure?

    // Monitor the application
    return monitorApplication(appId);

}

From source file:org.talend.components.simplefileio.runtime.SparkSimpleFileIOInputRuntimeTestIT.java

License:Open Source License

/**
 * Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
 *///from  www .ja  va2  s. c  o m
@Category(ValidatesRunner.class)
@Test
public void testBasicDefaults() throws IOException {
    FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
    String inputFile = writeRandomCsvFile(fs, "/tmp/test/input.csv", 0, 0, 10, 10, 6, ";", "\n");
    String fileSpec = fs.getUri().resolve("/tmp/test/input.csv").toString();

    // Configure the component.
    SimpleFileIOInputProperties inputProps = SimpleFileIOInputRuntimeTest.createInputComponentProperties();
    inputProps.getDatasetProperties().path.setValue(fileSpec);

    // Create the runtime.
    SimpleFileIOInputRuntime runtime = new SimpleFileIOInputRuntime();
    runtime.initialize(null, inputProps);

    // Use the runtime in a direct pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> readLines = p.apply(runtime);

    // Check the expected values.
    List<IndexedRecord> expected = new ArrayList<>();
    for (String record : inputFile.split("\n")) {
        expected.add(ConvertToIndexedRecord.convertToAvro(record.split(";")));
    }
    PAssert.that(readLines).containsInAnyOrder(expected);

    // And run the test.
    p.run().waitUntilFinish();
}

From source file:org.talend.components.simplefileio.runtime.SparkSimpleFileIOOutputRuntimeTestIT.java

License:Open Source License

/**
 * Basic unit test using all default values (except for the path) on an in-memory DFS cluster.
 *///from  www.  j  a v  a  2s  .c  o  m
@Category(ValidatesRunner.class)
@Ignore("BEAM-1206")
@Test
public void testBasicDefaults() throws IOException {
    FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
    String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "basic").toUri()).toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a Spark pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> input = p.apply( //
            Create.of(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
                    ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))); //
    input.apply(runtime);

    // And run the test.
    p.run().waitUntilFinish();

    // Check the expected values.
    MiniDfsResource.assertReadFile(fs, fileSpec, "1;one", "2;two");
}

From source file:org.talend.components.simplefileio.runtime.SparkSimpleFileIOOutputRuntimeTestIT.java

License:Open Source License

@Test
public void testCsv_merge() throws IOException {
    FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
    String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "output.csv").toUri()).toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.CSV);
    props.mergeOutput.setValue(true);/*from  ww  w.  ja v a 2s. c  om*/

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a Spark pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> input = p.apply( //
            Create.of(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
                    ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))); //
    input.apply(runtime);

    // And run the test.
    p.run().waitUntilFinish();

    // Check the expected values.
    MiniDfsResource.assertReadFile(fs, fileSpec, "1;one", "2;two");
    MiniDfsResource.assertFileNumber(fs, fileSpec, 1);
}

From source file:org.talend.components.simplefileio.runtime.SparkSimpleFileIOOutputRuntimeTestIT.java

License:Open Source License

@Test
public void testAvro_merge() throws IOException {
    FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
    String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "output.avro").toUri()).toString();

    // Configure the component.
    SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.AVRO);
    props.mergeOutput.setValue(true);/*from w w w  .  j  a  v a  2 s.  c om*/

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a Spark pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> input = p.apply( //
            Create.of(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
                    ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))); //
    input.apply(runtime);

    // And run the test.
    p.run().waitUntilFinish();

    // Check the expected values.

    MiniDfsResource.assertReadAvroFile(fs, fileSpec,
            new HashSet<IndexedRecord>(
                    Arrays.asList(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
                            ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))),
            false);
    MiniDfsResource.assertFileNumber(fs, fileSpec, 1);
}

From source file:org.talend.components.simplefileio.runtime.SparkSimpleFileIOOutputRuntimeTestIT.java

License:Open Source License

@Test
public void testParquet_merge() throws IOException {
    FileSystem fs = FileSystem.get(spark.createHadoopConfiguration());
    String fileSpec = fs.getUri().resolve(new Path(tmp.getRoot().toString(), "output.parquet").toUri())
            .toString();/*from   w w  w . j  a  v  a  2s.c  om*/

    // Configure the component.
    SimpleFileIOOutputProperties props = SimpleFileIOOutputRuntimeTest.createOutputComponentProperties();
    props.getDatasetProperties().path.setValue(fileSpec);
    props.getDatasetProperties().format.setValue(SimpleFileIOFormat.PARQUET);
    props.mergeOutput.setValue(true);

    // Create the runtime.
    SimpleFileIOOutputRuntime runtime = new SimpleFileIOOutputRuntime();
    runtime.initialize(null, props);

    // Use the runtime in a Spark pipeline to test.
    final Pipeline p = spark.createPipeline();
    PCollection<IndexedRecord> input = p.apply( //
            Create.of(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
                    ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))); //
    input.apply(runtime);

    // And run the test.
    p.run().waitUntilFinish();

    // Check the expected values.
    MiniDfsResource.assertReadParquetFile(fs, fileSpec,
            new HashSet<IndexedRecord>(
                    Arrays.asList(ConvertToIndexedRecord.convertToAvro(new String[] { "1", "one" }), //
                            ConvertToIndexedRecord.convertToAvro(new String[] { "2", "two" }))),
            false);
    MiniDfsResource.assertFileNumber(fs, fileSpec, 1);
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

protected static void saveApplicationSetupToJob(JobConf jobConf, boolean getFreshProperties) throws Exception {
    // Do we load a fresh properties File?
    //TODO fix, if necessary
    //if (getFreshProperties)
    //   loadApplicationSetup(new Path(ApplicationSetup.TERRIER_HOME));

    FileSystem remoteFS = FileSystem.get(jobConf);
    URI remoteFSURI = remoteFS.getUri();
    //make a copy of the current application setup properties, these may be amended
    //as some files are more globally accessible
    final Properties propertiesDuringJob = new Properties();
    Properties appProperties = ApplicationSetup.getProperties();
    for (Object _key : appProperties.keySet()) {
        String key = (String) _key;
        propertiesDuringJob.put(key, appProperties.get(key));
    }//ww  w  .  j  a  v a2  s. c  o  m

    //the share folder is needed during indexing, save this on DFS
    if (Files.getFileSystemName(ApplicationSetup.TERRIER_SHARE).equals("local")) {
        Path tempTRShare = makeTemporaryFile(jobConf, "terrier.share");
        propertiesDuringJob.setProperty("terrier.share", remoteFSURI.resolve(tempTRShare.toUri()).toString());
        if (Files.exists(ApplicationSetup.TERRIER_SHARE)) {
            jobConf.set("terrier.share.copied", remoteFSURI.resolve(tempTRShare.toUri()).toString());
            logger.info("Copying terrier share/ directory (" + ApplicationSetup.TERRIER_SHARE
                    + ") to shared storage area (" + remoteFSURI.resolve(tempTRShare.toUri()).toString() + ")");
            FileUtil.copy(FileSystem.getLocal(jobConf), new Path(ApplicationSetup.TERRIER_SHARE), remoteFS,
                    tempTRShare, false, false, jobConf);
        } else {
            logger.warn(
                    "No terrier.share folder found at " + ApplicationSetup.TERRIER_SHARE + ", copying skipped");
        }
    }

    //copy the terrier.properties content over
    Path tempTRProperties = makeTemporaryFile(jobConf, "terrier.properties");
    logger.debug("Writing terrier properties out to DFS " + tempTRProperties.toString());
    OutputStream out = remoteFS.create(tempTRProperties);
    remoteFS.deleteOnExit(tempTRProperties);
    propertiesDuringJob.store(out, "Automatically generated by HadoopUtility.saveApplicationSetupToJob()");
    out.close();
    out = null;
    DistributedCache.addCacheFile(tempTRProperties.toUri().resolve(new URI("#terrier.properties")), jobConf);
    DistributedCache.createSymlink(jobConf);

    //copy the non-JVM system properties over as well
    Path tempSysProperties = makeTemporaryFile(jobConf, "system.properties");
    DataOutputStream dos = FileSystem.get(jobConf).create(tempSysProperties);
    logger.debug("Writing system properties out to DFS " + tempSysProperties.toString());
    for (Object _propertyKey : System.getProperties().keySet()) {
        String propertyKey = (String) _propertyKey;
        if (!startsWithAny(propertyKey, checkSystemProperties)) {
            dos.writeUTF(propertyKey);
            dos.writeUTF(System.getProperty(propertyKey));
        }
    }
    dos.writeUTF("FIN");
    dos.close();
    dos = null;
    DistributedCache.addCacheFile(tempSysProperties.toUri().resolve(new URI("#system.properties")), jobConf);
}

From source file:org.trustedanalytics.scheduler.filesystem.HdfsOrgSpecificSpace.java

License:Apache License

public HdfsOrgSpecificSpace(FileSystem fileSystem, String orgId, TokenProvider tokenProvider) {
    Objects.requireNonNull(fileSystem);
    Objects.requireNonNull(orgId);

    this.fileSystem = fileSystem;
    this.root = new Path(String.format("%s/org/%s/", fileSystem.getUri(), orgId));
    this.tokenProvider = tokenProvider;
}

From source file:org.warcbase.index.IndexerRunner.java

License:Apache License

private void cacheSolrHome(JobConf conf, String solrHomeZipName) throws IOException {
    File tmpSolrHomeDir = new File("src/main/solr").getAbsoluteFile();

    // Create a ZIP file.
    File solrHomeLocalZip = File.createTempFile("tmp-", solrHomeZipName);
    Zipper.zipDir(tmpSolrHomeDir, solrHomeLocalZip);

    // Add to HDFS.
    FileSystem fs = FileSystem.get(conf);
    String hdfsSolrHomeDir = fs.getHomeDirectory() + "/solr/tempHome/" + solrHomeZipName;
    fs.copyFromLocalFile(new Path(solrHomeLocalZip.toString()), new Path(hdfsSolrHomeDir));

    final URI baseZipUrl = fs.getUri().resolve(hdfsSolrHomeDir + '#' + solrHomeZipName);

    // Cache it./*from w  w w.  j a  v  a 2 s  . c om*/
    DistributedCache.addCacheArchive(baseZipUrl, conf);
}