Example usage for org.apache.hadoop.mapreduce TaskInputOutputContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskInputOutputContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.moz.fiji.mapreduce.impl.HFileWriterContext.java

License:Apache License

/**
 * Constructs a new context that can write cells to an HFile that can be loaded into an HBase
 * table.//from  www.ja v  a 2  s  .  c  o  m
 *
 * @param hadoopContext is the Hadoop {@link TaskInputOutputContext} that will be used to perform
 *     the writes.
 * @throws IOException on I/O error.
 */
public HFileWriterContext(TaskInputOutputContext<?, ?, ?, ?> hadoopContext) throws IOException {
    super(hadoopContext);
    final Configuration conf = new Configuration(hadoopContext.getConfiguration());
    final FijiURI outputURI = FijiURI.newBuilder(conf.get(FijiConfKeys.FIJI_OUTPUT_TABLE_URI)).build();
    mFiji = Fiji.Factory.open(outputURI, conf);
    mTable = mFiji.openTable(outputURI.getTable());
    mReader = mTable.openTableReader();
    mColumnNameTranslator = HBaseColumnNameTranslator.from(mTable.getLayout());
    mEntityIdFactory = EntityIdFactory.getFactory(mTable.getLayout());
}

From source file:com.moz.fiji.mapreduce.impl.InternalFijiContext.java

License:Apache License

/**
 * Constructs a new implementation of {@link FijiContext}.
 *
 * @param context is the Hadoop {@link TaskInputOutputContext} that will back the new
 *    {@link FijiContext}/*from  w w w  .j  a  v a 2  s  . co m*/
 * @throws IOException on I/O error.
 */
protected InternalFijiContext(TaskInputOutputContext context) throws IOException {
    mHadoopContext = context;
    mKeyValueStoreFactory = KeyValueStoreReaderFactory.create(context.getConfiguration());
}

From source file:com.moz.fiji.mapreduce.util.SerializeLoggerAspect.java

License:Apache License

/**
 * Logic to serialize collected profiling content to a file on HDFS. The files are stored
 * in the current working directory for this context, in a folder specified by STATS_DIR. The per
 * task file is named by the task attempt id.
 * We obtain the profiling stats collected by the LogTimerAspect in FijiSchema. The format of the
 * file is as follows: Job Name, Job ID, Task Attempt, Function Signature,
 * Aggregate Time (nanoseconds), Number of Invocations, Time per call (nanoseconds)'\n'
 *
 * @param context The {@link TaskInputOutputContext} for this job.
 * @throws IOException If the writes to HDFS fail.
 *///w w w.  j a v  a  2 s  . c om
private void serializeToFile(TaskInputOutputContext context) throws IOException {
    Path parentPath = new Path(context.getWorkingDirectory(), STATS_DIR);
    FileSystem fs = parentPath.getFileSystem(context.getConfiguration());
    fs.mkdirs(parentPath);
    Path path = new Path(parentPath, context.getTaskAttemptID().toString());
    OutputStreamWriter out = new OutputStreamWriter(fs.create(path, true), "UTF-8");
    try {
        out.write("Job Name, Job ID, Task Attempt, Function Signature, Aggregate Time (nanoseconds), "
                + "Number of Invocations, Time per call (nanoseconds)\n");

        ConcurrentHashMap<String, LoggingInfo> signatureTimeMap = mLogTimerAspect.getSignatureTimeMap();
        for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) {
            writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
        }

        signatureTimeMap = mMRLogTimerAspect.getSignatureTimeMap();
        for (Map.Entry<String, LoggingInfo> entrySet : signatureTimeMap.entrySet()) {
            writeProfileInformation(out, context, entrySet.getKey(), entrySet.getValue());
        }
    } finally {
        out.close();
    }
}

From source file:datafu.hourglass.mapreduce.CollapsingMapper.java

License:Apache License

@Override
public void setContext(TaskInputOutputContext<Object, Object, Object, Object> context) {
    super.setContext(context);

    if (_mapper instanceof Configurable) {
        ((Configurable) _mapper).setConf(context.getConfiguration());
    }/*from   w w  w.jav  a2s  .  c  o m*/
}

From source file:de.l3s.concatgz.io.ImmediateOutput.java

License:Open Source License

public ImmediateOutput(TaskInputOutputContext context, boolean flushOnWrite) throws IOException {
    this.context = context;
    this.flushOnWrite = flushOnWrite;
    Configuration conf = context.getConfiguration();
    this.dir = getPath(conf);
    this.fs = FileSystem.newInstance(conf);
    this.bufferSize = conf.getInt("io.file.buffer.size", 4096);
    this.replication = getReplication(conf);

    String idPrefix = getIdPrefix(conf);
    file = "" + context.getTaskAttemptID().getTaskID().getId();
    while (file.length() < 5)
        file = "0" + file;
    if (idPrefix.length() > 0)
        file = idPrefix + "-" + file;
    file = "-" + file;
}

From source file:io.apigee.lembos.node.types.TaskInputOutputContextWrap.java

License:Apache License

/**
 * Creates an instance of {@link TaskInputOutputContextWrap}, registers it in the JavaScript {@link Scriptable}
 * scope and sets up the Java<->JavaScript bridge for the {@link TaskInputOutputContext}.
 *
 * @param scope the JavaScript scope associate the TaskInputOutputContextWrap with
 * @param runtime the Node.js runtime//from  ww w .  j  a  v  a  2  s  .c o  m
 * @param context the Hadoop context being wrapped
 *
 * @return the created context wrapper
 */
public static TaskInputOutputContextWrap getInstance(final Scriptable scope, final NodeRuntime runtime,
        final TaskInputOutputContext context) {
    final Scriptable parent = scope.getParentScope() == null ? scope : scope.getParentScope();
    Context ctx = Context.getCurrentContext();

    if (ctx == null) {
        ctx = Context.enter();
    }

    try {
        if (!ScriptableObject.hasProperty(parent, CLASS_NAME)) {
            try {
                ScriptableObject.defineClass(parent, TaskInputOutputContextWrap.class);
            } catch (IllegalAccessException | InstantiationException | InvocationTargetException e) {
                // This should never happen at runtime but we have to throw something
                throw new RuntimeException(e);
            }
        }

        final TaskInputOutputContextWrap contextWrapper = (TaskInputOutputContextWrap) ctx.newObject(scope,
                CLASS_NAME);

        contextWrapper.jsConf = ConfigurationWrap.getInstance(runtime, context.getConfiguration());
        contextWrapper.context = context;
        contextWrapper.runtime = runtime;
        contextWrapper.scope = scope;

        return contextWrapper;
    } finally {
        Context.exit();
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.Application.java

License:Apache License

/**
 * Start the child process to handle the task for us.
 * @throws IOException/*from w  w  w .j av a 2  s.co m*/
 * @throws InterruptedException
 */
Application(TaskInputOutputContext<K1, V1, K2, V2> context, DummyRecordReader input)
        throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    serverSocket = new ServerSocket(0);
    Map<String, String> env = new HashMap<String, String>();
    // add TMPDIR environment variable with the value of java.io.tmpdir
    env.put("TMPDIR", System.getProperty("java.io.tmpdir"));
    env.put(Submitter.PORT, Integer.toString(serverSocket.getLocalPort()));

    //Add token to the environment if security is enabled
    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(context.getCredentials());
    // This password is used as shared secret key between this application and
    // child pipes process
    byte[] password = jobToken.getPassword();
    String localPasswordFile = new File(".") + Path.SEPARATOR + "jobTokenPassword";
    writePasswordToLocalFile(localPasswordFile, password, conf);
    // FIXME why is this not Submitter.SECRET_LOCATION ?
    env.put("hadoop.pipes.shared.secret.location", localPasswordFile);

    List<String> cmd = new ArrayList<String>();
    String interpretor = conf.get(Submitter.INTERPRETOR);
    if (interpretor != null) {
        cmd.add(interpretor);
    }
    String executable = context.getLocalCacheFiles()[0].toString();
    if (!(new File(executable).canExecute())) {
        // LinuxTaskController sets +x permissions on all distcache files already.
        // In case of DefaultTaskController, set permissions here.
        FileUtil.chmod(executable, "u+x");
    }
    cmd.add(executable);
    // wrap the command in a stdout/stderr capture
    // we are starting map/reduce task of the pipes job. this is not a cleanup
    // attempt. 
    TaskAttemptID taskid = context.getTaskAttemptID();

    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(conf);
    cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength, false);
    process = runClient(cmd, env);
    clientSocket = serverSocket.accept();

    String challenge = getSecurityChallenge();
    String digestToSend = createDigest(password, challenge);
    String digestExpected = createDigest(password, digestToSend);

    handler = new OutputHandler<K2, V2>(context, input, digestExpected);
    K2 outputKey = (K2) ReflectionUtils.newInstance(context.getOutputKeyClass(), conf);
    V2 outputValue = (V2) ReflectionUtils.newInstance(context.getOutputValueClass(), conf);
    downlink = new BinaryProtocol<K1, V1, K2, V2>(clientSocket, handler, outputKey, outputValue, conf);

    downlink.authenticate(digestToSend, challenge);
    waitForAuthentication();
    LOG.debug("Authentication succeeded");
    downlink.start();
    downlink.setJobConf(conf);
}

From source file:org.apache.crunch.impl.mr.run.CrunchTaskContext.java

License:Apache License

public CrunchTaskContext(TaskInputOutputContext<Object, Object, Object, Object> taskContext,
        NodeContext nodeContext) {/*from   w w w . jav a2 s. c o m*/
    this.taskContext = taskContext;
    this.nodeContext = nodeContext;
    Configuration conf = taskContext.getConfiguration();
    Path path = new Path(new Path(conf.get(PlanningParameters.CRUNCH_WORKING_DIRECTORY)),
            nodeContext.toString());
    try {
        this.nodes = (List<RTNode>) DistCache.read(conf, path);
    } catch (IOException e) {
        throw new CrunchRuntimeException("Could not read runtime node information", e);
    }
}

From source file:org.apache.crunch.io.CrunchOutputs.java

License:Apache License

private static Map<String, OutputConfig> getNamedOutputs(TaskInputOutputContext<?, ?, ?, ?> context) {
    Map<String, OutputConfig> out = Maps.newHashMap();
    Configuration conf = context.getConfiguration();
    for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_OUTPUTS))) {
        List<String> fields = Lists.newArrayList(SPLITTER.split(input));
        String name = fields.get(0);
        FormatBundle<OutputFormat> bundle = FormatBundle.fromSerialized(fields.get(1), OutputFormat.class);
        try {// w ww .  j  a  v a  2 s.  com
            Class<?> keyClass = Class.forName(fields.get(2));
            Class<?> valueClass = Class.forName(fields.get(3));
            out.put(name, new OutputConfig(bundle, keyClass, valueClass));
        } catch (ClassNotFoundException e) {
            throw new CrunchRuntimeException(e);
        }
    }
    return out;
}

From source file:org.apache.crunch.io.hbase.HBaseData.java

License:Apache License

@Override
public Iterable<Pair<ImmutableBytesWritable, Result>> read(TaskInputOutputContext<?, ?, ?, ?> ctxt)
        throws IOException {
    Configuration hconf = HBaseConfiguration.create(ctxt.getConfiguration());
    HTable htable = new HTable(hconf, table);
    Scan scan = HBaseSourceTarget.convertStringToScan(scanAsString);
    return new HTableIterable(htable, scan);
}