List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.datatorrent.lib.util.WindowDataManagerTest.java
License:Apache License
@Test public void testDelete() throws IOException { Map<Integer, String> dataOf1 = Maps.newHashMap(); dataOf1.put(1, "one"); dataOf1.put(2, "two"); dataOf1.put(3, "three"); Map<Integer, String> dataOf2 = Maps.newHashMap(); dataOf2.put(4, "four"); dataOf2.put(5, "five"); dataOf2.put(6, "six"); Map<Integer, String> dataOf3 = Maps.newHashMap(); dataOf2.put(7, "seven"); dataOf2.put(8, "eight"); dataOf2.put(9, "nine"); for (int i = 1; i <= 9; ++i) { testMeta.storageManager.save(dataOf1, 1, i); }/*from www . j a v a 2s . c om*/ testMeta.storageManager.save(dataOf2, 2, 1); testMeta.storageManager.save(dataOf3, 3, 1); testMeta.storageManager.partitioned(Lists.<WindowDataManager>newArrayList(testMeta.storageManager), Sets.newHashSet(2, 3)); testMeta.storageManager.setup(testMeta.context); testMeta.storageManager.deleteUpTo(1, 6); Path appPath = new Path(testMeta.applicationPath + '/' + testMeta.storageManager.getRecoveryPath()); FileSystem fs = FileSystem.newInstance(appPath.toUri(), new Configuration()); FileStatus[] fileStatuses = fs.listStatus(new Path(appPath, Integer.toString(1))); Assert.assertEquals("number of windows for 1", 3, fileStatuses.length); TreeSet<String> windows = Sets.newTreeSet(); for (FileStatus fileStatus : fileStatuses) { windows.add(fileStatus.getPath().getName()); } Assert.assertEquals("window list for 1", Sets.newLinkedHashSet(Arrays.asList("7", "8", "9")), windows); Assert.assertEquals("no data for 2", false, fs.exists(new Path(appPath, Integer.toString(2)))); Assert.assertEquals("no data for 3", false, fs.exists(new Path(appPath, Integer.toString(3)))); }
From source file:com.datatorrent.stram.cli.ApexCli.java
License:Apache License
private File copyToLocal(String[] files) throws IOException { File tmpDir = new File(System.getProperty("java.io.tmpdir") + "/datatorrent/" + ManagementFactory.getRuntimeMXBean().getName()); tmpDir.mkdirs();//from w w w . jav a 2 s . c o m for (int i = 0; i < files.length; i++) { try { URI uri = new URI(files[i]); String scheme = uri.getScheme(); if (scheme == null || scheme.equals("file")) { files[i] = uri.getPath(); } else { try (FileSystem tmpFs = FileSystem.newInstance(uri, conf)) { Path srcPath = new Path(uri.getPath()); Path dstPath = new Path(tmpDir.getAbsolutePath(), String.valueOf(i) + srcPath.getName()); tmpFs.copyToLocalFile(srcPath, dstPath); files[i] = dstPath.toUri().getPath(); } } } catch (URISyntaxException ex) { throw new RuntimeException(ex); } } return tmpDir; }
From source file:com.datatorrent.stram.cli.DTCli.java
License:Apache License
private File copyToLocal(String[] files) throws IOException { File tmpDir = new File("/tmp/datatorrent/" + ManagementFactory.getRuntimeMXBean().getName()); tmpDir.mkdirs();//from w w w . ja v a 2 s .c o m for (int i = 0; i < files.length; i++) { try { URI uri = new URI(files[i]); String scheme = uri.getScheme(); if (scheme == null || scheme.equals("file")) { files[i] = uri.getPath(); } else { FileSystem tmpFs = FileSystem.newInstance(uri, conf); try { Path srcPath = new Path(uri.getPath()); Path dstPath = new Path(tmpDir.getAbsolutePath(), String.valueOf(i) + srcPath.getName()); tmpFs.copyToLocalFile(srcPath, dstPath); files[i] = dstPath.toUri().getPath(); } finally { tmpFs.close(); } } } catch (URISyntaxException ex) { throw new RuntimeException(ex); } } return tmpDir; }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
public void copyInitialState(Path origAppDir) throws IOException { // locate previous snapshot String newAppDir = this.dag.assertAppPath(); FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf); // read snapshot against new dependencies Object snapshot = recoveryHandler.restore(); if (snapshot == null) { throw new IllegalArgumentException("No previous application state found in " + origAppDir); }//from w ww .j av a 2s . co m InputStream logIs = recoveryHandler.getLog(); // modify snapshot state to switch app id ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf); Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS); FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf); // remove the path that was created by the storage agent during deserialization and replacement fs.delete(checkpointPath, true); // write snapshot to new location recoveryHandler = new FSRecoveryHandler(newAppDir, conf); recoveryHandler.save(snapshot); OutputStream logOs = recoveryHandler.rotateLog(); IOUtils.copy(logIs, logOs); logOs.flush(); logOs.close(); logIs.close(); // copy sub directories that are not present in target FileStatus[] lFiles = fs.listStatus(origAppDir); for (FileStatus f : lFiles) { if (f.isDirectory()) { String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir); if (!fs.exists(new Path(targetPath))) { LOG.debug("Copying {} to {}", f.getPath(), targetPath); FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf); //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf); } else { LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath); //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777)); } } } }
From source file:com.datatorrent.stram.StramClient.java
License:Apache License
/** * Launch application for the dag represented by this client. * * @throws YarnException//w w w . j ava 2s . com * @throws IOException */ public void startApplication() throws YarnException, IOException { Class<?>[] defaultClasses; if (applicationType.equals(YARN_APPLICATION_TYPE)) { //TODO restrict the security check to only check if security is enabled for webservices. if (UserGroupInformation.isSecurityEnabled()) { defaultClasses = DATATORRENT_SECURITY_CLASSES; } else { defaultClasses = DATATORRENT_CLASSES; } } else { throw new IllegalStateException(applicationType + " is not a valid application type."); } LinkedHashSet<String> localJarFiles = findJars(dag, defaultClasses); if (resources != null) { localJarFiles.addAll(resources); } YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics(); LOG.info("Got Cluster metric info from ASM" + ", numNodeManagers=" + clusterMetrics.getNumNodeManagers()); //GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); //GetClusterNodesResponse clusterNodesResp = rmClient.clientRM.getClusterNodes(clusterNodesReq); //LOG.info("Got Cluster node info from ASM"); //for (NodeReport node : clusterNodesResp.getNodeReports()) { // LOG.info("Got node report from ASM for" // + ", nodeId=" + node.getNodeId() // + ", nodeAddress" + node.getHttpAddress() // + ", nodeRackName" + node.getRackName() // + ", nodeNumContainers" + node.getNumContainers() // + ", nodeHealthStatus" + node.getHealthReport()); //} List<QueueUserACLInfo> listAclInfo = yarnClient.getQueueAclsInfo(); for (QueueUserACLInfo aclInfo : listAclInfo) { for (QueueACL userAcl : aclInfo.getUserAcls()) { LOG.info("User ACL Info for Queue" + ", queueName=" + aclInfo.getQueueName() + ", userAcl=" + userAcl.name()); } } // Get a new application id YarnClientApplication newApp = yarnClient.createApplication(); appId = newApp.getNewApplicationResponse().getApplicationId(); // Dump out information about cluster capability as seen by the resource manager int maxMem = newApp.getNewApplicationResponse().getMaximumResourceCapability().getMemory(); LOG.info("Max mem capabililty of resources in this cluster " + maxMem); int amMemory = dag.getMasterMemoryMB(); if (amMemory > maxMem) { LOG.info("AM memory specified above max threshold of cluster. Using max value." + ", specified=" + amMemory + ", max=" + maxMem); amMemory = maxMem; } if (dag.getAttributes().get(LogicalPlan.APPLICATION_ID) == null) { dag.setAttribute(LogicalPlan.APPLICATION_ID, appId.toString()); } // Create launch context for app master LOG.info("Setting up application submission context for ASM"); ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); // set the application id appContext.setApplicationId(appId); // set the application name appContext.setApplicationName(dag.getValue(LogicalPlan.APPLICATION_NAME)); appContext.setApplicationType(this.applicationType); if (YARN_APPLICATION_TYPE.equals(this.applicationType)) { //appContext.setMaxAppAttempts(1); // no retries until Stram is HA } // Set up the container launch context for the application master ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); // Setup security tokens // If security is enabled get ResourceManager and NameNode delegation tokens. // Set these tokens on the container so that they are sent as part of application submission. // This also sets them up for renewal by ResourceManager. The NameNode delegation rmToken // is also used by ResourceManager to fetch the jars from HDFS and set them up for the // application master launch. if (UserGroupInformation.isSecurityEnabled()) { Credentials credentials = new Credentials(); String tokenRenewer = conf.get(YarnConfiguration.RM_PRINCIPAL); if (tokenRenewer == null || tokenRenewer.length() == 0) { throw new IOException("Can't get Master Kerberos principal for the RM to use as renewer"); } // For now, only getting tokens for the default file-system. FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { final Token<?> tokens[] = fs.addDelegationTokens(tokenRenewer, credentials); if (tokens != null) { for (Token<?> token : tokens) { LOG.info("Got dt for " + fs.getUri() + "; " + token); } } } finally { fs.close(); } addRMDelegationToken(tokenRenewer, credentials); DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); ByteBuffer fsTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); amContainer.setTokens(fsTokens); } // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); // copy required jar files to dfs, to be localized for containers FileSystem fs = StramClientUtils.newFileSystemInstance(conf); try { Path appsBasePath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), StramClientUtils.SUBDIR_APPS); Path appPath = new Path(appsBasePath, appId.toString()); String libJarsCsv = copyFromLocal(fs, appPath, localJarFiles.toArray(new String[] {})); LOG.info("libjars: {}", libJarsCsv); dag.getAttributes().put(LogicalPlan.LIBRARY_JARS, libJarsCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, libJarsCsv, localResources, fs); if (archives != null) { String[] localFiles = archives.split(","); String archivesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("archives: {}", archivesCsv); dag.getAttributes().put(LogicalPlan.ARCHIVES, archivesCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.ARCHIVE, archivesCsv, localResources, fs); } if (files != null) { String[] localFiles = files.split(","); String filesCsv = copyFromLocal(fs, appPath, localFiles); LOG.info("files: {}", filesCsv); dag.getAttributes().put(LogicalPlan.FILES, filesCsv); LaunchContainerRunnable.addFilesToLocalResources(LocalResourceType.FILE, filesCsv, localResources, fs); } dag.getAttributes().put(LogicalPlan.APPLICATION_PATH, appPath.toString()); if (dag.getAttributes() .get(OperatorContext.STORAGE_AGENT) == null) { /* which would be the most likely case */ Path checkpointPath = new Path(appPath, LogicalPlan.SUBDIR_CHECKPOINTS); // use conf client side to pickup any proxy settings from dt-site.xml dag.setAttribute(OperatorContext.STORAGE_AGENT, new FSStorageAgent(checkpointPath.toString(), conf)); } if (dag.getAttributes().get(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR) == null) { dag.setAttribute(LogicalPlan.CONTAINER_OPTS_CONFIGURATOR, new BasicContainerOptConfigurator()); } // Set the log4j properties if needed if (!log4jPropFile.isEmpty()) { Path log4jSrc = new Path(log4jPropFile); Path log4jDst = new Path(appPath, "log4j.props"); fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); LocalResource log4jRsrc = Records.newRecord(LocalResource.class); log4jRsrc.setType(LocalResourceType.FILE); log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); log4jRsrc.setSize(log4jFileStatus.getLen()); localResources.put("log4j.properties", log4jRsrc); } if (originalAppId != null) { Path origAppPath = new Path(appsBasePath, this.originalAppId); LOG.info("Restart from {}", origAppPath); copyInitialState(origAppPath); } // push logical plan to DFS location Path cfgDst = new Path(appPath, LogicalPlan.SER_FILE_NAME); FSDataOutputStream outStream = fs.create(cfgDst, true); LogicalPlan.write(this.dag, outStream); outStream.close(); Path launchConfigDst = new Path(appPath, LogicalPlan.LAUNCH_CONFIG_FILE_NAME); outStream = fs.create(launchConfigDst, true); conf.writeXml(outStream); outStream.close(); FileStatus topologyFileStatus = fs.getFileStatus(cfgDst); LocalResource topologyRsrc = Records.newRecord(LocalResource.class); topologyRsrc.setType(LocalResourceType.FILE); topologyRsrc.setVisibility(LocalResourceVisibility.APPLICATION); topologyRsrc.setResource(ConverterUtils.getYarnUrlFromURI(cfgDst.toUri())); topologyRsrc.setTimestamp(topologyFileStatus.getModificationTime()); topologyRsrc.setSize(topologyFileStatus.getLen()); localResources.put(LogicalPlan.SER_FILE_NAME, topologyRsrc); // Set local resource info into app master container launch context amContainer.setLocalResources(localResources); // Set the necessary security tokens as needed //amContainer.setContainerTokens(containerToken); // Set the env variables to be setup in the env where the application master will be run LOG.info("Set the environment for the application master"); Map<String, String> env = new HashMap<String, String>(); // Add application jar(s) location to classpath // At some point we should not be required to add // the hadoop specific classpaths to the env. // It should be provided out of the box. // For now setting all required classpaths including // the classpath to "." for the application jar(s) // including ${CLASSPATH} will duplicate the class path in app master, removing it for now //StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); StringBuilder classPathEnv = new StringBuilder("./*"); String classpath = conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH); for (String c : StringUtils.isBlank(classpath) ? YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH : classpath.split(",")) { if (c.equals("$HADOOP_CLIENT_CONF_DIR")) { // SPOI-2501 continue; } classPathEnv.append(':'); classPathEnv.append(c.trim()); } env.put("CLASSPATH", classPathEnv.toString()); // propagate to replace node managers user name (effective in non-secure mode) env.put("HADOOP_USER_NAME", UserGroupInformation.getLoginUser().getUserName()); amContainer.setEnvironment(env); // Set the necessary command to execute the application master ArrayList<CharSequence> vargs = new ArrayList<CharSequence>(30); // Set java executable command LOG.info("Setting up app master command"); vargs.add(javaCmd); if (dag.isDebug()) { vargs.add("-agentlib:jdwp=transport=dt_socket,server=y,suspend=n"); } // Set Xmx based on am memory size // default heap size 75% of total memory if (dag.getMasterJVMOptions() != null) { vargs.add(dag.getMasterJVMOptions()); } vargs.add("-Xmx" + (amMemory * 3 / 4) + "m"); vargs.add("-XX:+HeapDumpOnOutOfMemoryError"); vargs.add("-XX:HeapDumpPath=/tmp/dt-heap-" + appId.getId() + ".bin"); vargs.add("-Dhadoop.root.logger=" + (dag.isDebug() ? "DEBUG" : "INFO") + ",RFA"); vargs.add("-Dhadoop.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR); vargs.add(String.format("-D%s=%s", StreamingContainer.PROP_APP_PATH, dag.assertAppPath())); if (dag.isDebug()) { vargs.add("-Dlog4j.debug=true"); } String loggersLevel = conf.get(DTLoggerFactory.DT_LOGGERS_LEVEL); if (loggersLevel != null) { vargs.add(String.format("-D%s=%s", DTLoggerFactory.DT_LOGGERS_LEVEL, loggersLevel)); } vargs.add(StreamingAppMaster.class.getName()); vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); // Get final command StringBuilder command = new StringBuilder(9 * vargs.size()); for (CharSequence str : vargs) { command.append(str).append(" "); } LOG.info("Completed setting up app master command " + command.toString()); List<String> commands = new ArrayList<String>(); commands.add(command.toString()); amContainer.setCommands(commands); // Set up resource type requirements // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario // amContainer.setServiceData(serviceData); appContext.setAMContainerSpec(amContainer); // Set the priority for the application master Priority pri = Records.newRecord(Priority.class); pri.setPriority(amPriority); appContext.setPriority(pri); // Set the queue to which this application is to be submitted in the RM appContext.setQueue(queueName); // Submit the application to the applications manager // SubmitApplicationResponse submitResp = rmClient.submitApplication(appRequest); // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure String specStr = Objects.toStringHelper("Submitting application: ") .add("name", appContext.getApplicationName()).add("queue", appContext.getQueue()) .add("user", UserGroupInformation.getLoginUser()).add("resource", appContext.getResource()) .toString(); LOG.info(specStr); if (dag.isDebug()) { //LOG.info("Full submission context: " + appContext); } yarnClient.submitApplication(appContext); } finally { fs.close(); } }
From source file:com.datatorrent.stram.StreamingContainerManager.java
License:Apache License
/** * This method is for saving meta information about this application in HDFS -- the meta information that generally * does not change across multiple attempts *//*from w ww . j a v a 2s . c o m*/ private void saveMetaInfo() throws IOException { Path path = new Path(this.vars.appPath, APP_META_FILENAME + "." + System.nanoTime()); FileSystem fs = FileSystem.newInstance(path.toUri(), new Configuration()); try { FSDataOutputStream os = fs.create(path); try { JSONObject top = new JSONObject(); JSONObject attributes = new JSONObject(); for (Map.Entry<Attribute<?>, Object> entry : this.plan.getLogicalPlan().getAttributes() .entrySet()) { attributes.put(entry.getKey().getSimpleName(), entry.getValue()); } JSONObject customMetrics = new JSONObject(); for (Map.Entry<String, Map<String, Object>> entry : latestLogicalMetrics.entrySet()) { customMetrics.put(entry.getKey(), new JSONArray(entry.getValue().keySet())); } top.put(APP_META_KEY_ATTRIBUTES, attributes); top.put(APP_META_KEY_CUSTOM_METRICS, customMetrics); os.write(top.toString().getBytes()); } catch (JSONException ex) { throw new RuntimeException(ex); } finally { os.close(); } Path origPath = new Path(this.vars.appPath, APP_META_FILENAME); fs.rename(path, origPath); } finally { fs.close(); } }
From source file:com.datatorrent.stram.util.FSJsonLineFile.java
License:Apache License
public FSJsonLineFile(Path path, FsPermission permission) throws IOException { fs = FileSystem.newInstance(path.toUri(), new Configuration()); FSDataOutputStream myos;/*from w ww .j a v a2 s . c o m*/ if (fs.exists(path)) { try { // happens if not the first application attempt myos = fs.append(path); } catch (IOException ex) { LOG.warn("Caught exception (OK during unit test): {}", ex.getMessage()); myos = FileSystem.create(fs, path, permission); } } else { myos = FileSystem.create(fs, path, permission); } os = myos; this.objectMapper = (new JSONSerializationProvider()).getContext(null); }
From source file:com.digitalpebble.behemoth.ClassifierJob.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); filter = DocumentFilter.getFilters(job); lowerCase = job.getBoolean("classification.tokenize", false); docFeaturename = job.get("classification.doc.feature.name", "label"); String modelPath = job.get(ClassifierJob.modelNameParam); // optimisation for jvm reuse // do not reload the model if (classifier != null) { LOG.info("Reusing existing classifier [" + classifier.toString() + "]"); return;/* www . ja v a 2 s.c om*/ } long start = System.currentTimeMillis(); File modelFile = null; try { String modelCacheName = new Path(modelPath).getName(); Path[] cacheFiles = DistributedCache.getLocalCacheArchives(job); if (null != cacheFiles && cacheFiles.length > 0) { for (Path cachePath : cacheFiles) { LOG.info("LocalCache : " + cachePath.toUri()); LOG.info("modelCacheName : " + modelCacheName); if (cachePath.toUri().toString().endsWith(modelCacheName)) { String parent = new File(cachePath.toUri().getPath()).toString(); modelFile = new File(parent, modelCacheName.replaceAll(".zip", "")); LOG.info("Unzipped ? " + modelFile.getAbsolutePath()); boolean doesExist = modelFile.exists(); LOG.info("modelFile exists " + doesExist); // if it does not exist it must have been unpacked at // the parent level if (!doesExist) { modelFile = new File(parent); } break; } } } } catch (IOException ioe) { throw new RuntimeException("Impossible to retrieve model from distributed cache", ioe); } try { classifier = classifier.getClassifier(modelFile); } catch (Exception e) { throw new RuntimeException("Impossible to load model from " + modelFile, e); } long end = System.currentTimeMillis(); LOG.info("Model loaded in " + (end - start) + " msec"); }
From source file:com.digitalpebble.behemoth.gate.AbstractGATEMapper.java
License:Apache License
public void configure(JobConf job) { super.configure(job); config = job;// w w w. j a v a2s.c o m // we try to load the gate application // using the gate.app file String application_path = job.get("gate.application.path"); String gapp_file = job.get("gate.application.descriptor", "application.xgapp"); URL applicationDescriptorURL = null; // the application will have been unzipped and put on the distributed // cache try { String applicationName = new File(application_path).getCanonicalFile().getName(); // trim the zip if (applicationName.endsWith(".zip")) applicationName = applicationName.replaceAll(".zip", ""); Path[] localArchives = DistributedCache.getLocalCacheArchives(job); // identify the right archive for (Path la : localArchives) { String localPath = la.toUri().toString(); LOG.info("LocalCache : " + localPath); if (!localPath.endsWith(application_path)) continue; // see if the gapp file is directly under the dir applicationDescriptorURL = new URL("file://" + localPath + "/" + gapp_file); File f = new File(applicationDescriptorURL.getFile()); if (f.exists()) break; // or for older versions of the zipped pipelines applicationDescriptorURL = new URL("file://" + localPath + "/" + applicationName + "/" + gapp_file); break; } } catch (Exception e) { throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e); } if (applicationDescriptorURL == null) throw new RuntimeException("GATE app " + application_path + "not available in distributed cache"); processor = new GATEProcessor(applicationDescriptorURL); processor.setConf(config); }
From source file:com.digitalpebble.behemoth.languageidentification.LanguageIdDriver.java
License:Apache License
public int run(String[] args) throws Exception { final FileSystem fs = FileSystem.get(getConf()); Options options = new Options(); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); // create the parser CommandLineParser parser = new GnuParser(); options.addOption("h", "help", false, "print this message"); options.addOption("i", "input", true, "input file or directory"); options.addOption("o", "output", true, "output Behemoth corpus"); options.addOption("w", "overwrite", false, "overwrite the output"); Path inputPath = null;/*from www. j a va2 s .c o m*/ Path outputPath = null; boolean overWrite = false; // parse the command line arguments CommandLine cmdLine = null; try { cmdLine = parser.parse(options, args); String input = cmdLine.getOptionValue("i"); String output = cmdLine.getOptionValue("o"); if (cmdLine.hasOption("help")) { formatter.printHelp("LanguageIdDriver", options); return 0; } if (input == null | output == null) { formatter.printHelp("LanguageIdDriver", options); return -1; } inputPath = new Path(input); outputPath = new Path(output); if (cmdLine.hasOption("overwrite")) { overWrite = true; } } catch (ParseException e) { formatter.printHelp("LanguageIdDriver", options); } // check whether needs overwriting if (FileSystem.get(outputPath.toUri(), getConf()).exists(outputPath)) { if (!overWrite) { System.out.println("Output path " + outputPath + " already exists. Use option -w to overwrite."); return 0; } else fs.delete(outputPath, true); } JobConf job = new JobConf(getConf()); job.setJarByClass(this.getClass()); job.setJobName("Processing with Language Identifier"); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BehemothDocument.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BehemothDocument.class); job.setMapperClass(LanguageIdMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); try { long start = System.currentTimeMillis(); JobClient.runJob(job); long finish = System.currentTimeMillis(); if (log.isInfoEnabled()) { log.info("LanguagedIdDriver completed. Timing: " + (finish - start) + " ms"); } } catch (Exception e) { log.error(e.getMessage(), e); fs.delete(outputPath, true); return -1; } finally { } return 0; }