Example usage for org.apache.hadoop.mapred TaskAttemptID forName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID forName.

Prototype

public static TaskAttemptID forName(String str) throws IllegalArgumentException

Source Link

Document

Construct a TaskAttemptID object from given string

Usage

From source file:com.digitalpebble.behemoth.uima.UIMAMapper.java

License:Apache License

public void configure(JobConf conf) {

    this.config = conf;

    storeshortnames = config.getBoolean("uima.store.short.names", true);

    File pearpath = new File(conf.get("uima.pear.path"));
    String pearname = pearpath.getName();

    URL urlPEAR = null;// www  .j a  va 2 s.c  om

    try {
        Path[] localArchives = DistributedCache.getLocalCacheFiles(conf);
        // identify the right archive
        for (Path la : localArchives) {
            String localPath = la.toUri().toString();
            LOG.info("Inspecting local paths " + localPath);
            if (!localPath.endsWith(pearname))
                continue;
            urlPEAR = new URL("file://" + localPath);
            break;
        }
    } catch (IOException e) {
        throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e);
    }

    if (urlPEAR == null)
        throw new RuntimeException("UIMA pear " + pearpath + " not available in distributed cache");

    File pearFile = new File(urlPEAR.getPath());

    // should check whether a different mapper has already unpacked it
    // but for now we just unpack in a different location for every mapper
    TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id"));
    installDir = new File(pearFile.getParentFile(), attempt.toString());
    PackageBrowser instPear = PackageInstaller.installPackage(installDir, pearFile, true);

    // get the resources required for the AnalysisEngine
    org.apache.uima.resource.ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager();

    // Create analysis engine from the installed PEAR package using
    // the created PEAR specifier
    XMLInputSource in;
    try {
        in = new XMLInputSource(instPear.getComponentPearDescPath());

        ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

        tae = UIMAFramework.produceAnalysisEngine(specifier, rsrcMgr, null);

        cas = tae.newCAS();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    String[] featuresFilters = this.config.get("uima.features.filter", "").split(",");
    // the featurefilters have the following form : Type:featureName
    // we group them by annotation type
    for (String ff : featuresFilters) {
        String[] fp = ff.split(":");
        if (fp.length != 2)
            continue;
        Set<Feature> features = featfilts.get(fp[0]);
        if (features == null) {
            features = new HashSet<Feature>();
            featfilts.put(fp[0], features);
        }
        Feature f = cas.getTypeSystem().getFeatureByFullName(ff);
        if (f != null)
            features.add(f);
    }

    String[] annotTypes = this.config.get("uima.annotations.filter", "").split(",");
    uimatypes = new ArrayList<Type>(annotTypes.length);

    for (String type : annotTypes) {
        Type aType = cas.getTypeSystem().getType(type);
        uimatypes.add(aType);
    }

}

From source file:com.facebook.hiveio.output.HiveOutput.java

License:Apache License

/**
 * Write records to a Hive table//from  ww w  . j a v a 2 s  .  c  om
 *
 * @param outputDesc description of Hive table
 * @param records the records to write
 * @throws TException
 * @throws IOException
 * @throws InterruptedException
 */
public static void writeTable(HiveOutputDescription outputDesc, Iterable<HiveWritableRecord> records)
        throws TException, IOException, InterruptedException {
    long uniqueId = System.nanoTime();
    String taskAttemptIdStr = "attempt_200707121733_" + (int) uniqueId + "_m_000005_0";

    String profile = Long.toString(uniqueId);

    HiveConf conf = new HiveConf(HiveOutput.class);
    conf.setInt("mapred.task.partition", 1);
    conf.set("mapred.task.id", taskAttemptIdStr);

    HiveApiOutputFormat.initProfile(conf, outputDesc, profile);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(profile);

    TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskAttemptIdStr);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(new JobConf(conf), taskAttemptID);
    JobContext jobContext = new HackJobContext(new JobConf(conf), taskAttemptID.getJobID());

    RecordWriterImpl recordWriter = outputFormat.getRecordWriter(taskContext);

    HiveApiOutputCommitter committer = outputFormat.getOutputCommitter(taskContext);
    committer.setupJob(jobContext);

    committer.setupTask(taskContext);
    for (HiveWritableRecord record : records) {
        recordWriter.write(NullWritable.get(), record);
    }
    recordWriter.close(taskContext);
    committer.commitTask(taskContext);

    committer.commitJob(jobContext);
}

From source file:com.zjy.mongo.mapred.output.MongoRecordWriter.java

License:Apache License

/**
 * Create a new MongoRecordWriter./*from  w w w  .  j  a  v  a  2  s. c  om*/
 * @param conf the job configuration
 */
public MongoRecordWriter(final JobConf conf) {
    super(Collections.<DBCollection>emptyList(),
            new TaskAttemptContextImpl(conf, TaskAttemptID.forName(conf.get("mapred.task.id"))));
    configuration = conf;
}

From source file:eu.stratosphere.hadoopcompatibility.HadoopOutputFormatWrapper.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException//from  ww w  .  jav a  2s  . c  om
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    this.fileOutputCommitterWrapper.setupJob(this.jobConf);
    if (Integer.toString(taskNumber + 1).length() <= 6) {
        this.jobConf.set("mapred.task.id",
                "attempt__0000_r_"
                        + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ")
                                .replace(" ", "0")
                        + Integer.toString(taskNumber + 1) + "_0");
        //compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
        this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper
                .getTempTaskOutputPath(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))
                .toString());
    } else {
        throw new IOException("task id too large");
    }
    this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new DummyHadoopProgressable());
}

From source file:eu.stratosphere.hadoopcompatibility.HadoopOutputFormatWrapper.java

License:Apache License

/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws IOException// w  w w  . j av a2  s  .  c o m
 */
@Override
public void close() throws IOException {
    this.recordWriter.close(new DummyHadoopReporter());
    if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf,
            TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) {
        this.fileOutputCommitterWrapper.commitTask(this.jobConf,
                TaskAttemptID.forName(this.jobConf.get("mapred.task.id")));
    }
    //TODO: commitjob when all the tasks are finished
}

From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException/*from  w w  w . j  a va  2 s  .  c o m*/
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter.setupJob(jobContext);

    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}

From source file:eu.stratosphere.hadoopcompatibility.mapred.record.HadoopRecordOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException/*ww  w . j a  va2 s.  c  o  m*/
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    this.fileOutputCommitterWrapper.setupJob(this.jobConf);
    if (Integer.toString(taskNumber + 1).length() <= 6) {
        this.jobConf.set("mapred.task.id",
                "attempt__0000_r_"
                        + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ")
                                .replace(" ", "0")
                        + Integer.toString(taskNumber + 1) + "_0");
        //compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
        this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper
                .getTempTaskOutputPath(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))
                .toString());
    } else {
        throw new IOException("task id too large");
    }
    this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}

From source file:eu.stratosphere.hadoopcompatibility.mapred.record.HadoopRecordOutputFormat.java

License:Apache License

/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws IOException/*from   w w w.  ja v  a  2s .  co m*/
 */
@Override
public void close() throws IOException {
    this.recordWriter.close(new HadoopDummyReporter());
    if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf,
            TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) {
        this.fileOutputCommitterWrapper.commitTask(this.jobConf,
                TaskAttemptID.forName(this.jobConf.get("mapred.task.id")));
    }
    //TODO: commitjob when all the tasks are finished
}

From source file:it.crs4.pydoop.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @param conf the task's configuration//w  w w .java2s.  co m
 * @param recordReader the fake record reader to update progress with
 * @param output the collector to send output to
 * @param reporter the reporter for the task
 * @param outputKeyClass the class of the output keys
 * @param outputValueClass the class of the output values
 * @throws IOException
 * @throws InterruptedException
 */
Application(JobConf conf, RecordReader<FloatWritable, NullWritable> recordReader,
        OutputCollector<K2, V2> output, Reporter reporter, Class<? extends K2> outputKeyClass,
        Class<? extends V2> outputValueClass) throws IOException, InterruptedException {
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    TaskAttemptID taskid = TaskAttemptID.forName(conf.get(MRJobConfig.TASK_ATTEMPT_ID));

    // get the task's working directory
    String workDir = LocalJobRunner.getLocalTaskDir(conf.getUser(), taskid.getJobID().toString(),
            taskid.getTaskID().toString(), false);

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(conf.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();

    String localPasswordFile = new File(workDir, "jobTokenPassword").getAbsolutePath();
    writePasswordToLocalFile(localPasswordFile, password, conf);
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);

    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(output, reporter, recordReader, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(outputKeyClass, conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(outputValueClass, conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:ml.shifu.guagua.mapreduce.GuaguaMapper.java

License:Apache License

/**
 * In our cluster with hadoop-0.20.2-cdh3u4a, runtime exception is thrown to Child but mapper status doesn't change
 * to failed. We fail this task to make sure our fail-over can make job successful.
 *///from   www .  j  a v  a2 s . c o m
private void failTask(Throwable t, Configuration conf) {
    LOG.error("failtask: Killing task: {} ", conf.get(GuaguaMapReduceConstants.MAPRED_TASK_ID));
    try {
        org.apache.hadoop.mapred.JobClient jobClient = new org.apache.hadoop.mapred.JobClient(
                (org.apache.hadoop.mapred.JobConf) conf);
        JobID jobId = JobID.forName(conf.get(GuaguaMapReduceConstants.MAPRED_JOB_ID));
        RunningJob job = jobClient.getJob(jobId);
        job.killTask(TaskAttemptID.forName(conf.get(GuaguaMapReduceConstants.MAPRED_TASK_ID)), true);
    } catch (IOException ioe) {
        throw new GuaguaRuntimeException(ioe);
    }
}