List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache getLocalCacheFiles
@Deprecated public static Path[] getLocalCacheFiles(Configuration conf) throws IOException
From source file:de.tudarmstadt.lt.n2n.hadoop.GoogleSyntacticsJobDkbd.java
License:Apache License
@Override public AnalysisEngineDescription buildMapperEngine(Configuration conf) throws ResourceInitializationException { try {//from w ww. j a va 2s .c o m String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) { String extractorConfigurationFileName = new File(extractorConfigurationFilesArr[i]).getName(); for (Path p : DistributedCache.getLocalCacheFiles(conf)) if (p.getName().contains(extractorConfigurationFileName)) extractorConfigurationFilesArr[i] = p.toString(); } int maxlength = conf.getInt(SHARED_CONSTANTS.PARAM_MAXIMUM_PATHLENGTH, -1); AggregateBuilder builder = new AggregateBuilder(); // builder.add(AnalysisEngineFactory.createEngineDescription(MetaDataAnnotator.class)); builder.add(AnalysisEngineFactory.createEngineDescription(JoBimRelationPipeline .createGoogleSyntacticsRelationEngine(true/* create_tokens */, true/* create_sentences */, true/* create_dependencies */, true/* create_new_relations */, true/* create_dependency_path */, false/*ignore_nn_relations*/, maxlength/* dependecy_path_maxlength */, false/* create_detailed_output */, extractorConfigurationFilesArr/* extractor_configuration */, SHARED_CONSTANTS.HADOOP_CAS_CONSUMER_OUTPUT_FILENAME/* output_destination */))); return builder.createAggregateDescription(); } catch (IOException e) { throw new ResourceInitializationException(e); } }
From source file:de.tudarmstadt.lt.n2n.hadoop.PreparsedJob.java
License:Apache License
@Override public AnalysisEngineDescription buildMapperEngine(Configuration conf) throws ResourceInitializationException { try {/*from w ww . ja va 2 s . c o m*/ String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS); String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(","); for (int i = 0; i < extractorConfigurationFilesArr.length; i++) { String extractorConfigurationFileName = new File(extractorConfigurationFilesArr[i]).getName(); for (Path p : DistributedCache.getLocalCacheFiles(conf)) if (p.getName().contains(extractorConfigurationFileName)) extractorConfigurationFilesArr[i] = p.toString(); } AggregateBuilder builder = new AggregateBuilder(); // builder.add(AnalysisEngineFactory.createEngineDescription(MetaDataAnnotator.class)); builder.add(AnalysisEngineFactory.createEngineDescription(JoBimRelationPipeline .createPreparsedRelationEngine(true/* create_tokens */, true/* create_sentences */, true/* create_dependencies */, true/* create_new_relations */, true/* create_dependency_path */, false/*ignore_nn_relations*/, 5/* dependecy_path_maxlength */, false/* create_detailed_output */, extractorConfigurationFilesArr/* extractor_configuration */, SHARED_CONSTANTS.HADOOP_CAS_CONSUMER_OUTPUT_FILENAME/* output_destination */))); return builder.createAggregateDescription(); } catch (IOException e) { throw new ResourceInitializationException(e); } }
From source file:it.crs4.pydoop.pipes.Application.java
License:Apache License
/** * Start the child process to handle the task for us. * @param conf the task's configuration/*w ww.j a v a 2 s . c om*/ * @param recordReader the fake record reader to update progress with * @param output the collector to send output to * @param reporter the reporter for the task * @param outputKeyClass the class of the output keys * @param outputValueClass the class of the output values * @throws IOException * @throws InterruptedException */ Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader, OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass, Class<? extends V2> outputValueClass) throws IOException, InterruptedException { serverSocket = new ServerSocket(0); Map<String, String> env = new HashMap<String, String>(); // add TMPDIR environment variable with the value of java.io.tmpdir env.put("TMPDIR", System.getProperty("java.io.tmpdir")); env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort())); TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID)); // get the task's working directory String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(), taskid.getTaskID().toString(), false); //Add token to the environment if security is enabled Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials()); // This password is used as shared secret key between this application and // child pipes process byte[] password = jobToken.getPassword(); String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath(); writePasswordToLocalFile(localPasswordFile, password, conf); env.put("hadoop.pipes.shared.secret.location", localPasswordFile); List<String> cmd = new ArrayList<String>(); String interpretor = conf.get(Submitter.INTERPRETOR); if (interpretor != null) { cmd.add(interpretor); } String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString(); if (!(new File(executable).canExecute())) { // LinuxTaskController sets +x permissions on all distcache files already. // In case of DefaultTaskController, set permissions here. FileUtil.chmod(executable, "u+x"); } cmd.add(executable); // wrap the command in a stdout/stderr capture // we are starting map/reduce task of the pipes job. this is not a cleanup // attempt. File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(conf); cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false); process = runClient(cmd, env); clientSocket = serverSocket.accept(); String challenge = getSecurityChallenge(); String digestToSend = createDigest(password, challenge); String digestExpected = createDigest(password, digestToSend); handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected); K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf); V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf); downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf); downlink.authenticate(digestToSend, challenge); waitForAuthentication(); LOG.debug("Authentication succeeded"); downlink.start(); downlink.setJobConf(conf); }
From source file:nl.sanoma.hdt.report.generator.ReportGeneratorMapper.java
License:Open Source License
/** * Creating a reader for the metadata MapFile. * * @param context/*w w w.ja va 2 s.c o m*/ * @throws IOException * @throws InterruptedException */ @Override protected void setup(Mapper.Context context) throws IOException, InterruptedException { Path[] cacheFilesLocal = DistributedCache.getLocalCacheFiles(context.getConfiguration()); //caching only one file FileSystem dfs = FileSystem.get(context.getConfiguration()); metadataMapReader = new MapFile.Reader(dfs, cacheFilesLocal[0].toString() + "/part-m-00000/", context.getConfiguration()); }
From source file:org.apache.sysml.runtime.matrix.mapred.MRBaseForCommonInstructions.java
License:Apache License
protected void setupDistCacheFiles(JobConf job) throws IOException { if (MRJobConfiguration.getDistCacheInputIndices(job) == null) return;/*from w w w . j a v a 2s .com*/ //boolean isJobLocal = false; isJobLocal = InfrastructureAnalyzer.isLocalMode(job); String[] inputIndices = MRJobConfiguration.getInputPaths(job); String[] dcIndices = MRJobConfiguration.getDistCacheInputIndices(job).split(Instruction.INSTRUCTION_DELIM); Path[] dcFiles = DistributedCache.getLocalCacheFiles(job); PDataPartitionFormat[] inputPartitionFormats = MRJobConfiguration.getInputPartitionFormats(job); DistributedCacheInput[] dcInputs = new DistributedCacheInput[dcIndices.length]; for (int i = 0; i < dcIndices.length; i++) { byte inputIndex = Byte.parseByte(dcIndices[i]); //load if not already present (jvm reuse) if (!dcValues.containsKey(inputIndex)) { // When the job is in local mode, files can be read from HDFS directly -- use // input paths as opposed to "local" paths prepared by DistributedCache. Path p = null; if (isJobLocal) p = new Path(inputIndices[Byte.parseByte(dcIndices[i])]); else p = dcFiles[i]; dcInputs[i] = new DistributedCacheInput(p, MRJobConfiguration.getNumRows(job, inputIndex), //rlens[inputIndex], MRJobConfiguration.getNumColumns(job, inputIndex), //clens[inputIndex], MRJobConfiguration.getNumRowsPerBlock(job, inputIndex), //brlens[inputIndex], MRJobConfiguration.getNumColumnsPerBlock(job, inputIndex), //bclens[inputIndex], inputPartitionFormats[inputIndex]); dcValues.put(inputIndex, dcInputs[i]); } } }
From source file:org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl.java
License:Apache License
/** * Return the path array of the localized files * @return A path array of localized files * @throws IOException// w w w .j a v a 2s . c o m */ public Path[] getLocalCacheFiles() throws IOException { return DistributedCache.getLocalCacheFiles(conf); }