Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:com.datatorrent.lib.util.WindowDataManagerTest.java

License:Apache License

@Test
public void testDelete() throws IOException {
    Map<Integer, String> dataOf1 = Maps.newHashMap();
    dataOf1.put(1, "one");
    dataOf1.put(2, "two");
    dataOf1.put(3, "three");

    Map<Integer, String> dataOf2 = Maps.newHashMap();
    dataOf2.put(4, "four");
    dataOf2.put(5, "five");
    dataOf2.put(6, "six");

    Map<Integer, String> dataOf3 = Maps.newHashMap();
    dataOf2.put(7, "seven");
    dataOf2.put(8, "eight");
    dataOf2.put(9, "nine");

    for (int i = 1; i <= 9; ++i) {
        testMeta.storageManager.save(dataOf1, 1, i);
    }/*from   www  . j a v a  2s  . c  om*/

    testMeta.storageManager.save(dataOf2, 2, 1);
    testMeta.storageManager.save(dataOf3, 3, 1);

    testMeta.storageManager.partitioned(Lists.<WindowDataManager>newArrayList(testMeta.storageManager),
            Sets.newHashSet(2, 3));
    testMeta.storageManager.setup(testMeta.context);
    testMeta.storageManager.deleteUpTo(1, 6);

    Path appPath = new Path(testMeta.applicationPath + '/' + testMeta.storageManager.getRecoveryPath());
    FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration());
    FileStatus[] fileStatuses = fs.listStatus(new Path(appPath, Integer.toString(1)));
    Assert.assertEquals("number of windows for 1", 3, fileStatuses.length);
    TreeSet<String> windows = Sets.newTreeSet();
    for (FileStatus fileStatus : fileStatuses) {
        windows.add(fileStatus.getPath().getName());
    }
    Assert.assertEquals("window list for 1", Sets.newLinkedHashSet(Arrays.asList("7", "8", "9")), windows);
    Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2))));
    Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3))));
}

From source file:com.datatorrent.stram.cli.ApexCli.java

License:Apache License

private File copyToLocal(String[] files) throws IOException {
    File tmpDir = new File(System.getProperty("java.io.tmpdir") + "/datatorrent/"
            + ManagementFactory.getRuntimeMXBean().getName());
    tmpDir.mkdirs();//from w  w  w .  jav a 2 s . c o m
    for (int i = 0; i < files.length; i++) {
        try {
            URI uri = new URI(files[i]);
            String scheme = uri.getScheme();
            if (scheme == null || scheme.equals("file")) {
                files[i] = uri.getPath();
            } else {
                try (FileSystem tmpFs = FileSystem.newInstance(uri, conf)) {
                    Path srcPath = new Path(uri.getPath());
                    Path dstPath = new Path(tmpDir.getAbsolutePath(), String.valueOf(i) + srcPath.getName());
                    tmpFs.copyToLocalFile(srcPath, dstPath);
                    files[i] = dstPath.toUri().getPath();
                }
            }
        } catch (URISyntaxException ex) {
            throw new RuntimeException(ex);
        }
    }

    return tmpDir;
}

From source file:com.datatorrent.stram.cli.DTCli.java

License:Apache License

private File copyToLocal(String[] files) throws IOException {
    File tmpDir = new File("/tmp/datatorrent/" + ManagementFactory.getRuntimeMXBean().getName());
    tmpDir.mkdirs();//from w w  w .  ja v  a  2  s .c o m
    for (int i = 0; i < files.length; i++) {
        try {
            URI uri = new URI(files[i]);
            String scheme = uri.getScheme();
            if (scheme == null || scheme.equals("file")) {
                files[i] = uri.getPath();
            } else {
                FileSystem tmpFs = FileSystem.newInstance(uri, conf);
                try {
                    Path srcPath = new Path(uri.getPath());
                    Path dstPath = new Path(tmpDir.getAbsolutePath(), String.valueOf(i) + srcPath.getName());
                    tmpFs.copyToLocalFile(srcPath, dstPath);
                    files[i] = dstPath.toUri().getPath();
                } finally {
                    tmpFs.close();
                }
            }
        } catch (URISyntaxException ex) {
            throw new RuntimeException(ex);
        }
    }

    return tmpDir;
}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

public void copyInitialState(Path origAppDir) throws IOException {
    // locate previous snapshot
    String newAppDir = this.dag.assertAppPath();

    FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
    // read snapshot against new dependencies
    Object snapshot = recoveryHandler.restore();
    if (snapshot == null) {
        throw new IllegalArgumentException("No previous application state found in " + origAppDir);
    }//from   w  ww .j av  a  2s . co m
    InputStream logIs = recoveryHandler.getLog();

    // modify snapshot state to switch app id
    ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf);
    Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

    FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
    // remove the path that was created by the storage agent during deserialization and replacement
    fs.delete(checkpointPath, true);

    // write snapshot to new location
    recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
    recoveryHandler.save(snapshot);
    OutputStream logOs = recoveryHandler.rotateLog();
    IOUtils.copy(logIs, logOs);
    logOs.flush();
    logOs.close();
    logIs.close();

    // copy sub directories that are not present in target
    FileStatus[] lFiles = fs.listStatus(origAppDir);
    for (FileStatus f : lFiles) {
        if (f.isDirectory()) {
            String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir);
            if (!fs.exists(new Path(targetPath))) {
                LOG.debug("Copying {} to {}", f.getPath(), targetPath);
                FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
                //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf);
            } else {
                LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
                //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777));
            }
        }
    }

}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

/**
 * Launch application for the dag represented by this client.
 *
 * @throws YarnException//w  w w  .  j ava  2s  .  com
 * @throws IOException
 */
public void startApplication() throws YarnException, IOException {
    Class<?>[] defaultClasses;

    if (applicationType.equals(YARN_APPLICATION_TYPE)) {
        //TODO restrict the security check to only check if security is enabled for webservices.
        if (UserGroupInformation.isSecurityEnabled()) {
            defaultClasses = DATATORRENT_SECURITY_CLASSES;
        } else {
            defaultClasses = DATATORRENT_CLASSES;
        }
    } else {
        throw new IllegalStateException(applicationType + " is not a valid application type.");
    }

    LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses);

    if (resources != null) {
        localJarFiles.addAll(resources);
    }

    YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
    LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers());

    //GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
    //GetClusterNodesResponse clusterNodesResp = rmClient.clientRM.getClusterNodes(clusterNodesReq);
    //LOG.info("Got Cluster node info from ASM");
    //for (NodeReport node : clusterNodesResp.getNodeReports()) {
    //  LOG.info("Got node report from ASM for"
    //           + ", nodeId=" + node.getNodeId()
    //           + ", nodeAddress" + node.getHttpAddress()
    //           + ", nodeRackName" + node.getRackName()
    //           + ", nodeNumContainers" + node.getNumContainers()
    //           + ", nodeHealthStatus" + node.getHealthReport());
    //}
    List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo();
    for (QueueUserACLInfo aclInfo : listAclInfo) {
        for (QueueACL userAcl : aclInfo.getUserAcls()) {
            LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl="
                    + userAcl.name());
        }
    }

    // Get a new application id
    YarnClientApplication newApp = yarnClient.createApplication();
    appId = newApp.getNewApplicationResponse().getApplicationId();

    // Dump out information about cluster capability as seen by the resource manager
    int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory();
    LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
    int amMemory = dag.getMasterMemoryMB();
    if (amMemory > maxMem) {
        LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified="
                + amMemory + ", max=" + maxMem);
        amMemory = maxMem;
    }

    if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) {
        dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString());
    }

    // Create launch context for app master
    LOG.info("Setting up application submission context for ASM");
    ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);

    // set the application id
    appContext.setApplicationId(appId);
    // set the application name
    appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME));
    appContext.setApplicationType(this.applicationType);
    if (YARN_APPLICATION_TYPE.equals(this.applicationType)) {
        //appContext.setMaxAppAttempts(1); // no retries until Stram is HA
    }

    // Set up the container launch context for the application master
    ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);

    // Setup security tokens
    // If security is enabled get ResourceManager and NameNode delegation tokens.
    // Set these tokens on the container so that they are sent as part of application submission.
    // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken
    // is also used by ResourceManager to fetch the jars from HDFS and set them up for the
    // application master launch.
    if (UserGroupInformation.isSecurityEnabled()) {
        Credentials credentials = new Credentials();
        String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
        try {
            final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials);
            if (tokens != null) {
                for (Token<?> token : tokens) {
                    LOG.info("Got dt for " + fs.getUri() + "; " + token);
                }
            }
        } finally {
            fs.close();
        }

        addRMDelegationToken(tokenRenewer, credentials);

        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        amContainer.setTokens(fsTokens);
    }

    // set local resources for the application master
    // local files or archives as needed
    // In this scenario, the jar file for the application master is part of the local resources
    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

    // copy required jar files to dfs, to be localized for containers
    FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
    try {
        Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS);
        Path appPath = new Path(appsBasePath, appId.toString());

        String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {}));

        LOG.info("libjars: {}", libJarsCsv);
        dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv);
        LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, libJarsCsv, localResources,
                fs);

        if (archives != null) {
            String[] localFiles = archives.split(",");
            String archivesCsv = copyFromLocal(fs, appPath, localFiles);
            LOG.info("archives: {}", archivesCsv);
            dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv);
            LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.ARCHIVE, archivesCsv,
                    localResources, fs);
        }

        if (files != null) {
            String[] localFiles = files.split(",");
            String filesCsv = copyFromLocal(fs, appPath, localFiles);
            LOG.info("files: {}", filesCsv);
            dag.getAttributes().put(LogicalPlan.FILES, filesCsv);
            LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, filesCsv, localResources,
                    fs);
        }

        dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString());
        if (dag.getAttributes()
                .get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */
            Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS);
            // use conf client side to pickup any proxy settings from dt-site.xml
            dag.setAttribute(OperatorContext.STORAGE_AGENT,
                    new FSStorageAgent(checkpointPath.toString(), conf));
        }
        if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) {
            dag.setAttribute(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator());
        }

        // Set the log4j properties if needed
        if (!log4jPropFile.isEmpty()) {
            Path log4jSrc = new Path(log4jPropFile);
            Path log4jDst = new Path(appPath, "log4j.props");
            fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
            FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
            LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
            log4jRsrc.setType(LocalResourceType.FILE);
            log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
            log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
            log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
            log4jRsrc.setSize(log4jFileStatus.getLen());
            localResources.put("log4j.properties", log4jRsrc);
        }

        if (originalAppId != null) {
            Path origAppPath = new Path(appsBasePath, this.originalAppId);
            LOG.info("Restart from {}", origAppPath);
            copyInitialState(origAppPath);
        }

        // push logical plan to DFS location
        Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME);
        FSDataOutputStream outStream = fs.create(cfgDst, true);
        LogicalPlan.write(this.dag, outStream);
        outStream.close();

        Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME);
        outStream = fs.create(launchConfigDst, true);
        conf.writeXml(outStream);
        outStream.close();

        FileStatus topologyFileStatus = fs.getFileStatus(cfgDst);
        LocalResource topologyRsrc = Records.newRecord(LocalResource.class);
        topologyRsrc.setType(LocalResourceType.FILE);
        topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
        topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri()));
        topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime());
        topologyRsrc.setSize(topologyFileStatus.getLen());
        localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc);

        // Set local resource info into app master container launch context
        amContainer.setLocalResources(localResources);

        // Set the necessary security tokens as needed
        //amContainer.setContainerTokens(containerToken);
        // Set the env variables to be setup in the env where the application master will be run
        LOG.info("Set the environment for the application master");
        Map<String, String> env = new HashMap<String, String>();

        // Add application jar(s) location to classpath
        // At some point we should not be required to add
        // the hadoop specific classpaths to the env.
        // It should be provided out of the box.
        // For now setting all required classpaths including
        // the classpath to "." for the application jar(s)
        // including ${CLASSPATH} will duplicate the class path in app master, removing it for now
        //StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*");
        StringBuilder classPathEnv = new StringBuilder("./*");
        String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH);
        for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH
                : classpath.split(",")) {
            if (c.equals("$HADOOP_CLIENT_CONF_DIR")) {
                // SPOI-2501
                continue;
            }
            classPathEnv.append(':');
            classPathEnv.append(c.trim());
        }
        env.put("CLASSPATH", classPathEnv.toString());
        // propagate to replace node managers user name (effective in non-secure mode)
        env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName());

        amContainer.setEnvironment(env);

        // Set the necessary command to execute the application master
        ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30);

        // Set java executable command
        LOG.info("Setting up app master command");
        vargs.add(javaCmd);
        if (dag.isDebug()) {
            vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n");
        }
        // Set Xmx based on am memory size
        // default heap size 75% of total memory
        if (dag.getMasterJVMOptions() != null) {
            vargs.add(dag.getMasterJVMOptions());
        }
        vargs.add("-Xmx" + (amMemory * 3 / 4) + "m");
        vargs.add("-XX:+HeapDumpOnOutOfMemoryError");
        vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin");
        vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA");
        vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
        vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath()));
        if (dag.isDebug()) {
            vargs.add("-Dlog4j.debug=true");
        }

        String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL);
        if (loggersLevel != null) {
            vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel));
        }
        vargs.add(StreamingAppMaster.class.getName());
        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");

        // Get final command
        StringBuilder command = new StringBuilder(9 * vargs.size());
        for (CharSequence str : vargs) {
            command.append(str).append(" ");
        }

        LOG.info("Completed setting up app master command " + command.toString());
        List<String> commands = new ArrayList<String>();
        commands.add(command.toString());
        amContainer.setCommands(commands);

        // Set up resource type requirements
        // For now, only memory is supported so we set memory requirements
        Resource capability = Records.newRecord(Resource.class);
        capability.setMemory(amMemory);
        appContext.setResource(capability);

        // Service data is a binary blob that can be passed to the application
        // Not needed in this scenario
        // amContainer.setServiceData(serviceData);
        appContext.setAMContainerSpec(amContainer);

        // Set the priority for the application master
        Priority pri = Records.newRecord(Priority.class);
        pri.setPriority(amPriority);
        appContext.setPriority(pri);
        // Set the queue to which this application is to be submitted in the RM
        appContext.setQueue(queueName);

        // Submit the application to the applications manager
        // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest);
        // Ignore the response as either a valid response object is returned on success
        // or an exception thrown to denote some form of a failure
        String specStr = Objects.toStringHelper("Submitting application: ")
                .add("name", appContext.getApplicationName()).add("queue", appContext.getQueue())
                .add("user", UserGroupInformation.getLoginUser()).add("resource", appContext.getResource())
                .toString();
        LOG.info(specStr);
        if (dag.isDebug()) {
            //LOG.info("Full submission context: " + appContext);
        }
        yarnClient.submitApplication(appContext);
    } finally {
        fs.close();
    }
}

From source file:com.datatorrent.stram.StreamingContainerManager.java

License:Apache License

/**
 * This method is for saving meta information about this application in HDFS -- the meta information that generally
 * does not change across multiple attempts
 *//*from  w ww .  j a  v  a 2s  .  c  o  m*/
private void saveMetaInfo() throws IOException {
    Path path = new Path(this.vars.appPath, APP_META_FILENAME + "." + System.nanoTime());
    FileSystem fs = FileSystem.newInstance(path.toUri(), new Configuration());
    try {
        FSDataOutputStream os = fs.create(path);
        try {
            JSONObject top = new JSONObject();
            JSONObject attributes = new JSONObject();
            for (Map.Entry<Attribute<?>, Object> entry : this.plan.getLogicalPlan().getAttributes()
                    .entrySet()) {
                attributes.put(entry.getKey().getSimpleName(), entry.getValue());
            }
            JSONObject customMetrics = new JSONObject();
            for (Map.Entry<String, Map<String, Object>> entry : latestLogicalMetrics.entrySet()) {
                customMetrics.put(entry.getKey(), new JSONArray(entry.getValue().keySet()));
            }
            top.put(APP_META_KEY_ATTRIBUTES, attributes);
            top.put(APP_META_KEY_CUSTOM_METRICS, customMetrics);
            os.write(top.toString().getBytes());
        } catch (JSONException ex) {
            throw new RuntimeException(ex);
        } finally {
            os.close();
        }
        Path origPath = new Path(this.vars.appPath, APP_META_FILENAME);
        fs.rename(path, origPath);
    } finally {
        fs.close();
    }
}

From source file:com.datatorrent.stram.util.FSJsonLineFile.java

License:Apache License

public FSJsonLineFile(Path path, FsPermission permission) throws IOException {
    fs = FileSystem.newInstance(path.toUri(), new Configuration());
    FSDataOutputStream myos;/*from w  ww .j  a v a2 s  . c  o  m*/
    if (fs.exists(path)) {
        try {
            // happens if not the first application attempt
            myos = fs.append(path);
        } catch (IOException ex) {
            LOG.warn("Caught exception (OK during unit test): {}", ex.getMessage());
            myos = FileSystem.create(fs, path, permission);
        }
    } else {
        myos = FileSystem.create(fs, path, permission);
    }
    os = myos;
    this.objectMapper = (new JSONSerializationProvider()).getContext(null);
}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    filter = DocumentFilter.getFilters(job);
    lowerCase = job.getBoolean("classification.tokenize", false);
    docFeaturename = job.get("classification.doc.feature.name", "label");

    String modelPath = job.get(ClassifierJob.modelNameParam);

    // optimisation for jvm reuse
    // do not reload the model
    if (classifier != null) {
        LOG.info("Reusing existing classifier [" + classifier.toString() + "]");
        return;/*  www . ja v  a  2 s.c  om*/
    }

    long start = System.currentTimeMillis();
    File modelFile = null;
    try {
        String modelCacheName = new Path(modelPath).getName();
        Path[] cacheFiles = DistributedCache.getLocalCacheArchives(job);
        if (null != cacheFiles && cacheFiles.length > 0) {
            for (Path cachePath : cacheFiles) {
                LOG.info("LocalCache : " + cachePath.toUri());
                LOG.info("modelCacheName : " + modelCacheName);
                if (cachePath.toUri().toString().endsWith(modelCacheName)) {
                    String parent = new File(cachePath.toUri().getPath()).toString();
                    modelFile = new File(parent, modelCacheName.replaceAll(".zip", ""));
                    LOG.info("Unzipped ? " + modelFile.getAbsolutePath());
                    boolean doesExist = modelFile.exists();
                    LOG.info("modelFile exists " + doesExist);
                    // if it does not exist it must have been unpacked at
                    // the parent level
                    if (!doesExist) {
                        modelFile = new File(parent);
                    }
                    break;
                }
            }
        }
    } catch (IOException ioe) {
        throw new RuntimeException("Impossible to retrieve model from distributed cache", ioe);
    }

    try {
        classifier = classifier.getClassifier(modelFile);
    } catch (Exception e) {
        throw new RuntimeException("Impossible to load model from " + modelFile, e);
    }
    long end = System.currentTimeMillis();
    LOG.info("Model loaded in " + (end - start) + " msec");
}

From source file:com.digitalpebble.behemoth.gate.AbstractGATEMapper.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    config = job;// w w w.  j  a  v a2s.c o m

    // we try to load the gate application
    // using the gate.app file
    String application_path = job.get("gate.application.path");
    String gapp_file = job.get("gate.application.descriptor", "application.xgapp");

    URL applicationDescriptorURL = null;

    // the application will have been unzipped and put on the distributed
    // cache
    try {
        String applicationName = new File(application_path).getCanonicalFile().getName();
        // trim the zip
        if (applicationName.endsWith(".zip"))
            applicationName = applicationName.replaceAll(".zip", "");

        Path[] localArchives = DistributedCache.getLocalCacheArchives(job);
        // identify the right archive
        for (Path la : localArchives) {
            String localPath = la.toUri().toString();
            LOG.info("LocalCache : " + localPath);
            if (!localPath.endsWith(application_path))
                continue;
            // see if the gapp file is directly under the dir
            applicationDescriptorURL = new URL("file://" + localPath + "/" + gapp_file);
            File f = new File(applicationDescriptorURL.getFile());
            if (f.exists())
                break;
            // or for older versions of the zipped pipelines
            applicationDescriptorURL = new URL("file://" + localPath + "/" + applicationName + "/" + gapp_file);
            break;
        }
    } catch (Exception e) {
        throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e);
    }

    if (applicationDescriptorURL == null)
        throw new RuntimeException("GATE app " + application_path + "not available in distributed cache");

    processor = new GATEProcessor(applicationDescriptorURL);
    processor.setConf(config);
}

From source file:com.digitalpebble.behemoth.languageidentification.LanguageIdDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("i", "input", true, "input file or directory");
    options.addOption("o", "output", true, "output Behemoth corpus");
    options.addOption("w", "overwrite", false, "overwrite the output");

    Path inputPath = null;/*from www. j a  va2  s .c o  m*/
    Path outputPath = null;

    boolean overWrite = false;

    // parse the command line arguments
    CommandLine cmdLine = null;
    try {
        cmdLine = parser.parse(options, args);
        String input = cmdLine.getOptionValue("i");
        String output = cmdLine.getOptionValue("o");
        if (cmdLine.hasOption("help")) {
            formatter.printHelp("LanguageIdDriver", options);
            return 0;
        }
        if (input == null | output == null) {
            formatter.printHelp("LanguageIdDriver", options);
            return -1;
        }
        inputPath = new Path(input);
        outputPath = new Path(output);
        if (cmdLine.hasOption("overwrite")) {
            overWrite = true;
        }
    } catch (ParseException e) {
        formatter.printHelp("LanguageIdDriver", options);
    }

    // check whether needs overwriting
    if (FileSystem.get(outputPath.toUri(), getConf()).exists(outputPath)) {
        if (!overWrite) {
            System.out.println("Output path " + outputPath + " already exists. Use option -w to overwrite.");
            return 0;
        } else
            fs.delete(outputPath, true);
    }

    JobConf job = new JobConf(getConf());
    job.setJarByClass(this.getClass());

    job.setJobName("Processing with Language Identifier");

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BehemothDocument.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(LanguageIdMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (log.isInfoEnabled()) {
            log.info("LanguagedIdDriver completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        log.error(e.getMessage(), e);
        fs.delete(outputPath, true);
        return -1;
    } finally {
    }

    return 0;
}