Example usage for org.apache.hadoop.mapred JobConf setClass

List of usage examples for org.apache.hadoop.mapred JobConf setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface) 

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job./*from w  ww . ja v a2s  .c  o  m*/
 * <p/>
 *
 * @param conf              job conf to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only, cannot be the word 'part' as
 *                          that is reserved for the
 *                          default output.
 * @param multi             indicates if the named output is multi
 * @param outputFormatClass OutputFormat class.
 * @param schema            Schema to used for this namedOutput
 */
private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi,
        Class<? extends OutputFormat> outputFormatClass, Schema schema) {
    checkNamedOutputName(namedOutput);
    checkNamedOutput(conf, namedOutput, true);
    boolean isMapOnly = conf.getNumReduceTasks() == 0;
    if (schema != null)
        conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString());
    conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput);
    conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class);
    conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi);
}

From source file:org.apache.mahout.avro.text.mapred.AvroDocumentProcessor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf();
    if (args.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        return 0;
    }/* w ww  . ja  v  a2  s  .com*/

    conf.setStrings("io.serializations",
            new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(),
                    AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() });

    AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method.

    conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class);

    conf.setJarByClass(AvroDocumentProcessor.class);
    conf.setMapperClass(ProcessorMapper.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(AvroDocument.class);
    conf.setOutputValueClass(NullWritable.class);

    conf.setInputFormat(AvroInputFormat.class);
    conf.setOutputFormat(AvroOutputFormat.class);

    AvroInputFormat.setAvroInputClass(conf, AvroDocument.class);
    AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileSystem fs = FileSystem.get(conf);
    fs.delete(output, true);

    FileInputFormat.addInputPath(conf, input);
    FileOutputFormat.setOutputPath(conf, output);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    return job.isComplete() ? 0 : 1;
}

From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.java

License:Apache License

private Vertex newVertex(TezOperator tezOp, boolean isMap)
        throws IOException, ClassNotFoundException, InterruptedException {
    ProcessorDescriptor procDesc = ProcessorDescriptor.create(tezOp.getProcessorName());

    // Pass physical plans to vertex as user payload.
    JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false));

    // We do this so that dag.getCredentials(), job.getCredentials(),
    // job.getConfiguration().getCredentials() all reference the same Credentials object
    // Unfortunately there is no setCredentials() on Job
    payloadConf.setCredentials(dag.getCredentials());
    // We won't actually use this job, but we need it to talk with the Load Store funcs
    @SuppressWarnings("deprecation")
    Job job = new Job(payloadConf);
    payloadConf = (JobConf) job.getConfiguration();

    if (tezOp.sampleOperator != null) {
        payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.sampleOperator.getOperatorKey().toString());
    }/*from  w w w  .j av a2s  . c o m*/

    if (tezOp.sortOperator != null) {
        payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.sortOperator.getOperatorKey().toString());
    }

    String tmp;
    long maxCombinedSplitSize = 0;
    if (!tezOp.combineSmallSplits()
            || pc.getProperties().getProperty(PigConfiguration.PIG_SPLIT_COMBINATION, "true").equals("false"))
        payloadConf.setBoolean(PigConfiguration.PIG_NO_SPLIT_COMBINATION, true);
    else if ((tmp = pc.getProperties().getProperty(PigConfiguration.PIG_MAX_COMBINED_SPLIT_SIZE,
            null)) != null) {
        try {
            maxCombinedSplitSize = Long.parseLong(tmp);
        } catch (NumberFormatException e) {
            log.warn(
                    "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size");
        }
    }
    if (maxCombinedSplitSize > 0)
        payloadConf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize);

    payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp()));
    payloadConf.set("pig.inpSignatures",
            ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists()));
    payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits()));
    // Process stores
    LinkedList<POStore> stores = processStores(tezOp, payloadConf, job);

    payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc));
    payloadConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList()));
    payloadConf.set("exectype", "TEZ");
    payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true);
    payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS, PigInputFormat.class, InputFormat.class);

    // Set parent plan for all operators in the Tez plan.
    new PhyPlanSetter(tezOp.plan).visit();

    // Set the endOfAllInput flag on the physical plan if certain operators that
    // use this property (such as STREAM) are present in the plan.
    EndOfAllInputSetter.EndOfAllInputChecker checker = new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan);
    checker.visit();
    if (checker.isEndOfAllInputPresent()) {
        payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true");
    }

    // Configure the classes for incoming shuffles to this TezOp
    // TODO: Refactor out resetting input keys, PIG-3957
    List<PhysicalOperator> roots = tezOp.plan.getRoots();
    if (roots.size() == 1 && roots.get(0) instanceof POPackage) {
        POPackage pack = (POPackage) roots.get(0);

        List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack);
        if (succsList != null) {
            succsList = new ArrayList<PhysicalOperator>(succsList);
        }
        byte keyType = pack.getPkgr().getKeyType();
        tezOp.plan.remove(pack);
        payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack));
        setIntermediateOutputKeyValue(keyType, payloadConf, tezOp);
        POShuffleTezLoad newPack;
        newPack = new POShuffleTezLoad(pack);
        if (tezOp.isSkewedJoin()) {
            newPack.setSkewedJoins(true);
        }
        tezOp.plan.add(newPack);

        // Set input keys for POShuffleTezLoad. This is used to identify
        // the inputs that are attached to the POShuffleTezLoad in the
        // backend.
        Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>();
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (tezOp.sampleOperator != null && tezOp.sampleOperator == pred) {
                // skip sample vertex input
            } else {
                String inputKey = pred.getOperatorKey().toString();
                if (pred.isVertexGroup()) {
                    pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0));
                }
                LinkedList<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(pred.plan,
                        POLocalRearrangeTez.class);
                for (POLocalRearrangeTez lr : lrs) {
                    if (lr.isConnectedToPackage()
                            && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) {
                        localRearrangeMap.put((int) lr.getIndex(), inputKey);
                    }
                }
            }
        }
        for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) {
            newPack.addInputKey(entry.getValue());
        }

        if (succsList != null) {
            for (PhysicalOperator succs : succsList) {
                tezOp.plan.connect(newPack, succs);
            }
        }

        setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp);
    } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) {
        POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0);
        // TODO Need to fix multiple input key mapping
        TezOperator identityInOutPred = null;
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (!pred.isSampleAggregation()) {
                identityInOutPred = pred;
                break;
            }
        }
        identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString());
    } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) {
        POValueInputTez valueInput = (POValueInputTez) roots.get(0);

        LinkedList<String> scalarInputs = new LinkedList<String>();
        for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class)) {
            if (userFunc.getFunc() instanceof ReadScalarsTez) {
                scalarInputs.add(((ReadScalarsTez) userFunc.getFunc()).getTezInputs()[0]);
            }
        }
        // Make sure we don't find the scalar
        for (TezOperator pred : mPlan.getPredecessors(tezOp)) {
            if (!scalarInputs.contains(pred.getOperatorKey().toString())) {
                valueInput.setInputKey(pred.getOperatorKey().toString());
                break;
            }
        }
    }
    JobControlCompiler.setOutputFormat(job);

    // set parent plan in all operators. currently the parent plan is really
    // used only when POStream, POSplit are present in the plan
    new PhyPlanSetter(tezOp.plan).visit();

    // Serialize the execution plan
    payloadConf.set(PigProcessor.PLAN, ObjectSerializer.serialize(tezOp.plan));

    UDFContext.getUDFContext().serialize(payloadConf);

    MRToTezHelper.processMRSettings(payloadConf, globalConf);

    if (!pc.inIllustrator) {
        for (POStore store : stores) {
            // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized
            store.setInputs(null);
            store.setParentPlan(null);
        }
        // We put them in the reduce because PigOutputCommitter checks the
        // ID of the task to see if it's a map, and if not, calls the reduce
        // committers.
        payloadConf.set(JobControlCompiler.PIG_MAP_STORES,
                ObjectSerializer.serialize(new ArrayList<POStore>()));
        payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(stores));
    }

    if (tezOp.isNeedEstimateParallelism()) {
        payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true);
        log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString());
    }

    // Take our assembled configuration and create a vertex
    UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf);
    procDesc.setUserPayload(userPayload);

    Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(),
            isMap ? MRHelpers.getResourceForMRMapper(globalConf)
                    : MRHelpers.getResourceForMRReducer(globalConf));

    Map<String, String> taskEnv = new HashMap<String, String>();
    MRHelpers.updateEnvBasedOnMRTaskEnv(globalConf, taskEnv, isMap);
    vertex.setTaskEnvironment(taskEnv);

    // All these classes are @InterfaceAudience.Private in Hadoop. Switch to Tez methods in TEZ-1012
    // set the timestamps, public/private visibility of the archives and files
    ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(globalConf);
    // get DelegationToken for each cached file
    ClientDistributedCacheManager.getDelegationTokens(globalConf, job.getCredentials());
    MRApps.setupDistributedCache(globalConf, localResources);
    vertex.addTaskLocalFiles(localResources);

    vertex.setTaskLaunchCmdOpts(isMap ? MRHelpers.getJavaOptsForMRMapper(globalConf)
            : MRHelpers.getJavaOptsForMRReducer(globalConf));

    log.info("For vertex - " + tezOp.getOperatorKey().toString() + ": parallelism="
            + tezOp.getVertexParallelism() + ", memory=" + vertex.getTaskResource().getMemory() + ", java opts="
            + vertex.getTaskLaunchCmdOpts());

    // Right now there can only be one of each of these. Will need to be
    // more generic when there can be more.
    for (POLoad ld : tezOp.getLoaderInfo().getLoads()) {

        // TODO: These should get the globalConf, or a merged version that
        // keeps settings like pig.maxCombinedSplitSize
        vertex.setLocationHint(
                VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints()));
        vertex.addDataSource(ld.getOperatorKey().toString(), DataSourceDescriptor.create(
                InputDescriptor.create(MRInput.class.getName())
                        .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                                .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf))
                                .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build()
                                .toByteString().asReadOnlyByteBuffer())),
                InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()),
                dag.getCredentials()));
    }

    for (POStore store : stores) {

        ArrayList<POStore> emptyList = new ArrayList<POStore>();
        ArrayList<POStore> singleStore = new ArrayList<POStore>();
        singleStore.add(store);

        Configuration outputPayLoad = new Configuration(payloadConf);
        outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(emptyList));
        outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(singleStore));

        OutputDescriptor storeOutDescriptor = OutputDescriptor.create(MROutput.class.getName())
                .setUserPayload(TezUtils.createUserPayloadFromConf(outputPayLoad));
        if (tezOp.getVertexGroupStores() != null) {
            OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey());
            if (vertexGroupKey != null) {
                getPlan().getOperator(vertexGroupKey).getVertexGroupInfo()
                        .setStoreOutputDescriptor(storeOutDescriptor);
                continue;
            }
        }
        vertex.addDataSink(store.getOperatorKey().toString(), new DataSinkDescriptor(storeOutDescriptor,
                OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials()));
    }

    // LoadFunc and StoreFunc add delegation tokens to Job Credentials in
    // setLocation and setStoreLocation respectively. For eg: HBaseStorage
    // InputFormat add delegation token in getSplits and OutputFormat in
    // checkOutputSpecs. For eg: FileInputFormat and FileOutputFormat
    if (stores.size() > 0) {
        new PigOutputFormat().checkOutputSpecs(job);
    }

    // Set the right VertexManagerPlugin
    if (tezOp.getEstimatedParallelism() != -1) {
        if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) {
            // Set VertexManagerPlugin to PartitionerDefinedVertexManager, which is able
            // to decrease/increase parallelism of sorting vertex dynamically
            // based on the numQuantiles calculated by sample aggregation vertex
            vertex.setVertexManagerPlugin(
                    VertexManagerPluginDescriptor.create(PartitionerDefinedVertexManager.class.getName()));
            log.info("Set VertexManagerPlugin to PartitionerDefinedParallelismVertexManager for vertex "
                    + tezOp.getOperatorKey().toString());
        } else {
            boolean containScatterGather = false;
            boolean containCustomPartitioner = false;
            for (TezEdgeDescriptor edge : tezOp.inEdges.values()) {
                if (edge.dataMovementType == DataMovementType.SCATTER_GATHER) {
                    containScatterGather = true;
                }
                if (edge.partitionerClass != null) {
                    containCustomPartitioner = true;
                }
            }
            if (containScatterGather && !containCustomPartitioner) {
                // Use auto-parallelism feature of ShuffleVertexManager to dynamically
                // reduce the parallelism of the vertex
                VertexManagerPluginDescriptor vmPluginDescriptor = VertexManagerPluginDescriptor
                        .create(ShuffleVertexManager.class.getName());
                Configuration vmPluginConf = ConfigurationUtil.toConfiguration(pc.getProperties(), false);
                vmPluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
                        true);
                if (vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                        InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) != InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) {
                    vmPluginConf.setLong(
                            ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
                            vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
                                    InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER));
                }
                vmPluginDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(vmPluginConf));
                vertex.setVertexManagerPlugin(vmPluginDescriptor);
                log.info("Set auto parallelism for vertex " + tezOp.getOperatorKey().toString());
            }
        }
    }

    // Reset udfcontext jobconf. It is not supposed to be set in the front end
    UDFContext.getUDFContext().addJobConf(null);
    return vertex;
}

From source file:org.apache.sqoop.mapreduce.mainframe.TestMainframeDatasetFTPRecordReader.java

License:Apache License

@Before
public void setUp() throws IOException {
    mockFTPClient = mock(FTPClient.class);
    MainframeFTPClientUtils.setMockFTPClient(mockFTPClient);
    try {//  w w  w .j  a  va  2 s.c o m
        when(mockFTPClient.login("user", "pssword")).thenReturn(true);
        when(mockFTPClient.logout()).thenReturn(true);
        when(mockFTPClient.isConnected()).thenReturn(true);
        when(mockFTPClient.completePendingCommand()).thenReturn(true);
        when(mockFTPClient.changeWorkingDirectory(anyString())).thenReturn(true);
        when(mockFTPClient.getReplyCode()).thenReturn(200);
        when(mockFTPClient.noop()).thenReturn(200);
        when(mockFTPClient.setFileType(anyInt())).thenReturn(true);

        FTPFile ftpFile1 = new FTPFile();
        ftpFile1.setType(FTPFile.FILE_TYPE);
        ftpFile1.setName("test1");
        FTPFile ftpFile2 = new FTPFile();
        ftpFile2.setType(FTPFile.FILE_TYPE);
        ftpFile2.setName("test2");
        FTPFile[] ftpFiles = { ftpFile1, ftpFile2 };
        when(mockFTPClient.listFiles()).thenReturn(ftpFiles);

        when(mockFTPClient.retrieveFileStream("test1"))
                .thenReturn(new ByteArrayInputStream("123\n456\n".getBytes()));
        when(mockFTPClient.retrieveFileStream("test2"))
                .thenReturn(new ByteArrayInputStream("789\n".getBytes()));
        when(mockFTPClient.retrieveFileStream("NotComplete"))
                .thenReturn(new ByteArrayInputStream("NotComplete\n".getBytes()));
    } catch (IOException e) {
        fail("No IOException should be thrown!");
    }

    JobConf conf = new JobConf();
    conf.set(DBConfiguration.URL_PROPERTY, "localhost:" + "11111");
    conf.set(DBConfiguration.USERNAME_PROPERTY, "user");
    conf.set(DBConfiguration.PASSWORD_PROPERTY, "pssword");
    // set the password in the secure credentials object
    Text PASSWORD_SECRET_KEY = new Text(DBConfiguration.PASSWORD_PROPERTY);
    conf.getCredentials().addSecretKey(PASSWORD_SECRET_KEY, "pssword".getBytes());
    conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, DummySqoopRecord.class, DBWritable.class);

    Job job = new Job(conf);
    mfDIS = new MainframeDatasetInputSplit();
    mfDIS.addDataset("test1");
    mfDIS.addDataset("test2");
    context = mock(TaskAttemptContext.class);
    when(context.getConfiguration()).thenReturn(job.getConfiguration());
    mfDFTPRR = new MainframeDatasetFTPRecordReader();
}

From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java

License:Apache License

private DataSourceDescriptor generateDataSourceDescriptorMapReduce(Path inputSplitsDir) throws Exception {
    JobConf jobConf = new JobConf(dfsCluster.getFileSystem().getConf());
    jobConf.setUseNewMapper(true);//from   w  ww  .j a  v a 2 s .  co m
    jobConf.setClass(org.apache.hadoop.mapreduce.MRJobConfig.INPUT_FORMAT_CLASS_ATTR, TextInputFormat.class,
            InputFormat.class);
    jobConf.set(TextInputFormat.INPUT_DIR, testFilePath.toString());

    return MRInputHelpers.configureMRInputWithLegacySplitGeneration(jobConf, inputSplitsDir, true);
}

From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java

License:Apache License

public void setUpJobConf(JobConf job) {
    job.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString());
    job.set(MRConfig.LOCAL_DIR, workDir.toString());
    job.setClass(Constants.TEZ_RUNTIME_TASK_OUTPUT_MANAGER, TezTaskOutputFiles.class, TezTaskOutput.class);
    job.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName());
    job.setNumReduceTasks(1);//from w  ww.j  a va  2s.  c om
}

From source file:org.apache.tez.mapreduce.task.MRRuntimeTask.java

License:Apache License

private static void configureMRTask(JobConf job, MRTask task) throws IOException, InterruptedException {

    Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
    job.setCredentials(credentials);/*from w ww .jav  a  2s  . co  m*/
    // TODO Can this be avoided all together. Have the MRTezOutputCommitter use
    // the Tez parameter.
    // TODO This could be fetched from the env if YARN is setting it for all
    // Containers.
    // Set it in conf, so as to be able to be used the the OutputCommitter.
    job.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, job.getInt(TezJobConfig.APPLICATION_ATTEMPT_ID, -1));

    job.setClass(MRConfig.TASK_LOCAL_OUTPUT_CLASS, YarnOutputFiles.class, MapOutputFile.class); // MR

    Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(credentials);
    if (jobToken != null) {
        // Will MR ever run without a job token.
        SecretKey sk = JobTokenSecretManager.createSecretKey(jobToken.getPassword());
        task.setJobTokenSecret(sk);
    } else {
        LOG.warn("No job token set");
    }

    job.set(MRJobConfig.JOB_LOCAL_DIR, job.get(TezJobConfig.JOB_LOCAL_DIR));
    job.set(MRConfig.LOCAL_DIR, job.get(TezJobConfig.LOCAL_DIRS));
    if (job.get(TezJobConfig.DAG_CREDENTIALS_BINARY) != null) {
        job.set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, job.get(TezJobConfig.DAG_CREDENTIALS_BINARY));
    }

    // setup the child's attempt directories
    // Do the task-type specific localization
    task.localizeConfiguration(job);

    // Set up the DistributedCache related configs
    setupDistributedCacheConfig(job);

    task.setConf(job);
}

From source file:org.commoncrawl.hadoop.io.ARCInputFormat.java

License:Open Source License

/**
 * Sets the {@link ARCSource} implementation to use.
 * /* w ww.j a  v a2s .  c o  m*/
 * @param job
 *          the job to set the {@link ARCSource} for
 * @param arcSource
 *          the {@link Class} of the implementation to use
 * 
 * @see #P_ARC_SOURCE
 */
public static void setARCSourceClass(JobConf job, Class arcSource) {
    job.setClass(P_ARC_SOURCE, arcSource, ARCSource.class);
}

From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSaveTest.java

License:Apache License

@Test
public void testBasicMultiSave() throws Exception {
    JobConf conf = createJobConf();
    conf.set(ConfigurationOptions.ES_RESOURCE, "oldapi/multi-save");

    MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class);
    MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class);
    //MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class);

    PrintStreamOutputFormat.stream(conf, Stream.OUT);
    //conf.set("mapred.output.dir", "foo/bar");
    //FileOutputFormat.setOutputPath(conf, new Path("foo/bar"));

    conf.setClass("mapred.output.format.class", MultiOutputFormat.class, OutputFormat.class);
    runJob(conf);//from w  ww.j  ava 2 s. c om
}

From source file:org.sf.xrime.algorithms.statistics.VertexEdgeCounter.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    try {//from  www  .ja va  2s  .c o  m
        JobConf conf = new JobConf(context, AverageVertexDegree.class);
        conf.setJobName("AverageDegree");

        // the keys are a pseudo one ("Average_Degree")
        conf.setOutputKeyClass(Text.class);
        // the values are degrees (ints)
        conf.setOutputValueClass(LongWritable.class);
        conf.setMapperClass(MapClass.class);
        conf.setCombinerClass(ReduceClass.class);
        // No combiner is permitted.
        conf.setReducerClass(ReduceClass.class);
        // The format of input data is generated with WritableSerialization.
        conf.setInputFormat(SequenceFileInputFormat.class);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths(conf, getSource().getPath());
        FileOutputFormat.setOutputPath(conf, getDestination().getPath());
        conf.setNumMapTasks(getMapperNum());
        // Only one reducer is permitted, or the return value will be wrong.
        conf.setNumReduceTasks(1);

        // Set the possible CounterFilter class
        if (counterFilterClass != null) {
            conf.setClass(counterFilterKey, counterFilterClass, CounterFilter.class);
        }

        this.runningJob = JobClient.runJob(conf);

        FileSystem fs = getDestination().getPath().getFileSystem(conf);
        Path dataPath = new Path(getDestination().getPath().toString() + "/part-00000");
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf);

        Writable key = ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class),
                conf);
        Writable value = ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);

        while (reader.next(key, value)) {
            addCounter(((Text) key).toString(), ((LongWritable) value).get());
        }

        reader.close();
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    } catch (IllegalAccessException e) {
        throw new ProcessorExecutionException(e);
    }
}