Example usage for org.apache.hadoop.mapreduce.filecache DistributedCache getLocalCacheFiles

List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache getLocalCacheFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.filecache DistributedCache getLocalCacheFiles.

Prototype

@Deprecated
public static Path[] getLocalCacheFiles(Configuration conf) throws IOException 

Source Link

Document

Return the path array of the localized files.

Usage

From source file:de.tudarmstadt.lt.n2n.hadoop.GoogleSyntacticsJobDkbd.java

License:Apache License

@Override
public AnalysisEngineDescription buildMapperEngine(Configuration conf) throws ResourceInitializationException {
    try {//from  w  ww.  j  a  va 2s .c o  m
        String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++) {
            String extractorConfigurationFileName = new File(extractorConfigurationFilesArr[i]).getName();
            for (Path p : DistributedCache.getLocalCacheFiles(conf))
                if (p.getName().contains(extractorConfigurationFileName))
                    extractorConfigurationFilesArr[i] = p.toString();
        }

        int maxlength = conf.getInt(SHARED_CONSTANTS.PARAM_MAXIMUM_PATHLENGTH, -1);

        AggregateBuilder builder = new AggregateBuilder();
        // builder.add(AnalysisEngineFactory.createEngineDescription(MetaDataAnnotator.class));
        builder.add(AnalysisEngineFactory.createEngineDescription(JoBimRelationPipeline
                .createGoogleSyntacticsRelationEngine(true/* create_tokens */, true/* create_sentences */,
                        true/* create_dependencies */, true/* create_new_relations */,
                        true/* create_dependency_path */, false/*ignore_nn_relations*/,
                        maxlength/* dependecy_path_maxlength */, false/* create_detailed_output */,
                        extractorConfigurationFilesArr/* extractor_configuration */,
                        SHARED_CONSTANTS.HADOOP_CAS_CONSUMER_OUTPUT_FILENAME/* output_destination */)));
        return builder.createAggregateDescription();

    } catch (IOException e) {
        throw new ResourceInitializationException(e);
    }

}

From source file:de.tudarmstadt.lt.n2n.hadoop.PreparsedJob.java

License:Apache License

@Override
public AnalysisEngineDescription buildMapperEngine(Configuration conf) throws ResourceInitializationException {
    try {/*from w ww  . ja va 2 s  .  c  o m*/
        String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++) {
            String extractorConfigurationFileName = new File(extractorConfigurationFilesArr[i]).getName();
            for (Path p : DistributedCache.getLocalCacheFiles(conf))
                if (p.getName().contains(extractorConfigurationFileName))
                    extractorConfigurationFilesArr[i] = p.toString();
        }

        AggregateBuilder builder = new AggregateBuilder();
        // builder.add(AnalysisEngineFactory.createEngineDescription(MetaDataAnnotator.class));
        builder.add(AnalysisEngineFactory.createEngineDescription(JoBimRelationPipeline
                .createPreparsedRelationEngine(true/* create_tokens */, true/* create_sentences */,
                        true/* create_dependencies */, true/* create_new_relations */,
                        true/* create_dependency_path */, false/*ignore_nn_relations*/,
                        5/* dependecy_path_maxlength */, false/* create_detailed_output */,
                        extractorConfigurationFilesArr/* extractor_configuration */,
                        SHARED_CONSTANTS.HADOOP_CAS_CONSUMER_OUTPUT_FILENAME/* output_destination */)));
        return builder.createAggregateDescription();

    } catch (IOException e) {
        throw new ResourceInitializationException(e);
    }

}

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration/*w  ww.j  a  v a 2  s  . c om*/
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader,
        OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass,
        Class<? extends V2> outputValueClass) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));

    // get the task's working directory
    String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(),
            taskid.getTaskID().toString(), false);

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();

    String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath();
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:nl.sanoma.hdt.report.generator.ReportGeneratorMapper.java

License:Open Source License

/**
 * Creating a reader for the metadata MapFile.
 *
 * @param context/*w w  w.ja  va 2  s.c o m*/
 * @throws IOException
 * @throws InterruptedException
 */
@Override
protected void setup(Mapper.Context context) throws IOException, InterruptedException {

    Path[] cacheFilesLocal = DistributedCache.getLocalCacheFiles(context.getConfiguration());
    //caching only one file
    FileSystem dfs = FileSystem.get(context.getConfiguration());
    metadataMapReader = new MapFile.Reader(dfs, cacheFilesLocal[0].toString() + "/part-m-00000/",
            context.getConfiguration());
}

From source file:org.apache.sysml.runtime.matrix.mapred.MRBaseForCommonInstructions.java

License:Apache License

protected void setupDistCacheFiles(JobConf job) throws IOException {

    if (MRJobConfiguration.getDistCacheInputIndices(job) == null)
        return;/*from   w w w  . j a v  a 2s  .com*/

    //boolean isJobLocal = false;
    isJobLocal = InfrastructureAnalyzer.isLocalMode(job);

    String[] inputIndices = MRJobConfiguration.getInputPaths(job);
    String[] dcIndices = MRJobConfiguration.getDistCacheInputIndices(job).split(Instruction.INSTRUCTION_DELIM);
    Path[] dcFiles = DistributedCache.getLocalCacheFiles(job);
    PDataPartitionFormat[] inputPartitionFormats = MRJobConfiguration.getInputPartitionFormats(job);

    DistributedCacheInput[] dcInputs = new DistributedCacheInput[dcIndices.length];
    for (int i = 0; i < dcIndices.length; i++) {
        byte inputIndex = Byte.parseByte(dcIndices[i]);

        //load if not already present (jvm reuse)
        if (!dcValues.containsKey(inputIndex)) {
            // When the job is in local mode, files can be read from HDFS directly -- use 
            // input paths as opposed to "local" paths prepared by DistributedCache. 
            Path p = null;
            if (isJobLocal)
                p = new Path(inputIndices[Byte.parseByte(dcIndices[i])]);
            else
                p = dcFiles[i];

            dcInputs[i] = new DistributedCacheInput(p, MRJobConfiguration.getNumRows(job, inputIndex), //rlens[inputIndex],
                    MRJobConfiguration.getNumColumns(job, inputIndex), //clens[inputIndex],
                    MRJobConfiguration.getNumRowsPerBlock(job, inputIndex), //brlens[inputIndex],
                    MRJobConfiguration.getNumColumnsPerBlock(job, inputIndex), //bclens[inputIndex],
                    inputPartitionFormats[inputIndex]);
            dcValues.put(inputIndex, dcInputs[i]);
        }
    }
}

From source file:org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl.java

License:Apache License

/**
 * Return the path array of the localized files
 * @return A path array of localized files
 * @throws IOException//  w w w .j  a v  a 2s . c  o m
 */
public Path[] getLocalCacheFiles() throws IOException {
    return DistributedCache.getLocalCacheFiles(conf);
}