List of usage examples for org.apache.hadoop.mapred JobConf setClass
public void setClass(String name, Class<?> theClass, Class<?> xface)
name
property to the name of a theClass
implementing the given interface xface
. From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Adds a named output for the job./*from w ww . ja v a2s .c o m*/ * <p/> * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the * default output. * @param multi indicates if the named output is multi * @param outputFormatClass OutputFormat class. * @param schema Schema to used for this namedOutput */ private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi, Class<? extends OutputFormat> outputFormatClass, Schema schema) { checkNamedOutputName(namedOutput); checkNamedOutput(conf, namedOutput, true); boolean isMapOnly = conf.getNumReduceTasks() == 0; if (schema != null) conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString()); conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi); }
From source file:org.apache.mahout.avro.text.mapred.AvroDocumentProcessor.java
License:Apache License
@Override public int run(String[] args) throws Exception { JobConf conf = new JobConf(); if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 0; }/* w ww . ja v a2 s .com*/ conf.setStrings("io.serializations", new String[] { WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(), AvroReflectSerialization.class.getName(), AvroGenericSerialization.class.getName() }); AvroComparator.setSchema(AvroDocument._SCHEMA); //TODO: must be done in mapper, reducer configure method. conf.setClass("mapred.output.key.comparator.class", AvroComparator.class, RawComparator.class); conf.setJarByClass(AvroDocumentProcessor.class); conf.setMapperClass(ProcessorMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setOutputKeyClass(AvroDocument.class); conf.setOutputValueClass(NullWritable.class); conf.setInputFormat(AvroInputFormat.class); conf.setOutputFormat(AvroOutputFormat.class); AvroInputFormat.setAvroInputClass(conf, AvroDocument.class); AvroOutputFormat.setAvroOutputClass(conf, AvroDocument.class); Path input = new Path(args[0]); Path output = new Path(args[1]); FileSystem fs = FileSystem.get(conf); fs.delete(output, true); FileInputFormat.addInputPath(conf, input); FileOutputFormat.setOutputPath(conf, output); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); return job.isComplete() ? 0 : 1; }
From source file:org.apache.pig.backend.hadoop.executionengine.tez.TezDagBuilder.java
License:Apache License
private Vertex newVertex(TezOperator tezOp, boolean isMap) throws IOException, ClassNotFoundException, InterruptedException { ProcessorDescriptor procDesc = ProcessorDescriptor.create(tezOp.getProcessorName()); // Pass physical plans to vertex as user payload. JobConf payloadConf = new JobConf(ConfigurationUtil.toConfiguration(pc.getProperties(), false)); // We do this so that dag.getCredentials(), job.getCredentials(), // job.getConfiguration().getCredentials() all reference the same Credentials object // Unfortunately there is no setCredentials() on Job payloadConf.setCredentials(dag.getCredentials()); // We won't actually use this job, but we need it to talk with the Load Store funcs @SuppressWarnings("deprecation") Job job = new Job(payloadConf); payloadConf = (JobConf) job.getConfiguration(); if (tezOp.sampleOperator != null) { payloadConf.set(PigProcessor.SAMPLE_VERTEX, tezOp.sampleOperator.getOperatorKey().toString()); }/*from w w w .j av a2s . c o m*/ if (tezOp.sortOperator != null) { payloadConf.set(PigProcessor.SORT_VERTEX, tezOp.sortOperator.getOperatorKey().toString()); } String tmp; long maxCombinedSplitSize = 0; if (!tezOp.combineSmallSplits() || pc.getProperties().getProperty(PigConfiguration.PIG_SPLIT_COMBINATION, "true").equals("false")) payloadConf.setBoolean(PigConfiguration.PIG_NO_SPLIT_COMBINATION, true); else if ((tmp = pc.getProperties().getProperty(PigConfiguration.PIG_MAX_COMBINED_SPLIT_SIZE, null)) != null) { try { maxCombinedSplitSize = Long.parseLong(tmp); } catch (NumberFormatException e) { log.warn( "Invalid numeric format for pig.maxCombinedSplitSize; use the default maximum combined split size"); } } if (maxCombinedSplitSize > 0) payloadConf.setLong("pig.maxCombinedSplitSize", maxCombinedSplitSize); payloadConf.set("pig.inputs", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInp())); payloadConf.set("pig.inpSignatures", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpSignatureLists())); payloadConf.set("pig.inpLimits", ObjectSerializer.serialize(tezOp.getLoaderInfo().getInpLimits())); // Process stores LinkedList<POStore> stores = processStores(tezOp, payloadConf, job); payloadConf.set("pig.pigContext", ObjectSerializer.serialize(pc)); payloadConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); payloadConf.set("exectype", "TEZ"); payloadConf.setBoolean(MRConfiguration.MAPPER_NEW_API, true); payloadConf.setClass(MRConfiguration.INPUTFORMAT_CLASS, PigInputFormat.class, InputFormat.class); // Set parent plan for all operators in the Tez plan. new PhyPlanSetter(tezOp.plan).visit(); // Set the endOfAllInput flag on the physical plan if certain operators that // use this property (such as STREAM) are present in the plan. EndOfAllInputSetter.EndOfAllInputChecker checker = new EndOfAllInputSetter.EndOfAllInputChecker(tezOp.plan); checker.visit(); if (checker.isEndOfAllInputPresent()) { payloadConf.set(JobControlCompiler.END_OF_INP_IN_MAP, "true"); } // Configure the classes for incoming shuffles to this TezOp // TODO: Refactor out resetting input keys, PIG-3957 List<PhysicalOperator> roots = tezOp.plan.getRoots(); if (roots.size() == 1 && roots.get(0) instanceof POPackage) { POPackage pack = (POPackage) roots.get(0); List<PhysicalOperator> succsList = tezOp.plan.getSuccessors(pack); if (succsList != null) { succsList = new ArrayList<PhysicalOperator>(succsList); } byte keyType = pack.getPkgr().getKeyType(); tezOp.plan.remove(pack); payloadConf.set("pig.reduce.package", ObjectSerializer.serialize(pack)); setIntermediateOutputKeyValue(keyType, payloadConf, tezOp); POShuffleTezLoad newPack; newPack = new POShuffleTezLoad(pack); if (tezOp.isSkewedJoin()) { newPack.setSkewedJoins(true); } tezOp.plan.add(newPack); // Set input keys for POShuffleTezLoad. This is used to identify // the inputs that are attached to the POShuffleTezLoad in the // backend. Map<Integer, String> localRearrangeMap = new TreeMap<Integer, String>(); for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (tezOp.sampleOperator != null && tezOp.sampleOperator == pred) { // skip sample vertex input } else { String inputKey = pred.getOperatorKey().toString(); if (pred.isVertexGroup()) { pred = mPlan.getOperator(pred.getVertexGroupMembers().get(0)); } LinkedList<POLocalRearrangeTez> lrs = PlanHelper.getPhysicalOperators(pred.plan, POLocalRearrangeTez.class); for (POLocalRearrangeTez lr : lrs) { if (lr.isConnectedToPackage() && lr.getOutputKey().equals(tezOp.getOperatorKey().toString())) { localRearrangeMap.put((int) lr.getIndex(), inputKey); } } } } for (Map.Entry<Integer, String> entry : localRearrangeMap.entrySet()) { newPack.addInputKey(entry.getValue()); } if (succsList != null) { for (PhysicalOperator succs : succsList) { tezOp.plan.connect(newPack, succs); } } setIntermediateOutputKeyValue(pack.getPkgr().getKeyType(), payloadConf, tezOp); } else if (roots.size() == 1 && roots.get(0) instanceof POIdentityInOutTez) { POIdentityInOutTez identityInOut = (POIdentityInOutTez) roots.get(0); // TODO Need to fix multiple input key mapping TezOperator identityInOutPred = null; for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (!pred.isSampleAggregation()) { identityInOutPred = pred; break; } } identityInOut.setInputKey(identityInOutPred.getOperatorKey().toString()); } else if (roots.size() == 1 && roots.get(0) instanceof POValueInputTez) { POValueInputTez valueInput = (POValueInputTez) roots.get(0); LinkedList<String> scalarInputs = new LinkedList<String>(); for (POUserFunc userFunc : PlanHelper.getPhysicalOperators(tezOp.plan, POUserFunc.class)) { if (userFunc.getFunc() instanceof ReadScalarsTez) { scalarInputs.add(((ReadScalarsTez) userFunc.getFunc()).getTezInputs()[0]); } } // Make sure we don't find the scalar for (TezOperator pred : mPlan.getPredecessors(tezOp)) { if (!scalarInputs.contains(pred.getOperatorKey().toString())) { valueInput.setInputKey(pred.getOperatorKey().toString()); break; } } } JobControlCompiler.setOutputFormat(job); // set parent plan in all operators. currently the parent plan is really // used only when POStream, POSplit are present in the plan new PhyPlanSetter(tezOp.plan).visit(); // Serialize the execution plan payloadConf.set(PigProcessor.PLAN, ObjectSerializer.serialize(tezOp.plan)); UDFContext.getUDFContext().serialize(payloadConf); MRToTezHelper.processMRSettings(payloadConf, globalConf); if (!pc.inIllustrator) { for (POStore store : stores) { // unset inputs for POStore, otherwise, map/reduce plan will be unnecessarily deserialized store.setInputs(null); store.setParentPlan(null); } // We put them in the reduce because PigOutputCommitter checks the // ID of the task to see if it's a map, and if not, calls the reduce // committers. payloadConf.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(new ArrayList<POStore>())); payloadConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(stores)); } if (tezOp.isNeedEstimateParallelism()) { payloadConf.setBoolean(PigProcessor.ESTIMATE_PARALLELISM, true); log.info("Estimate quantile for sample aggregation vertex " + tezOp.getOperatorKey().toString()); } // Take our assembled configuration and create a vertex UserPayload userPayload = TezUtils.createUserPayloadFromConf(payloadConf); procDesc.setUserPayload(userPayload); Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, tezOp.getVertexParallelism(), isMap ? MRHelpers.getResourceForMRMapper(globalConf) : MRHelpers.getResourceForMRReducer(globalConf)); Map<String, String> taskEnv = new HashMap<String, String>(); MRHelpers.updateEnvBasedOnMRTaskEnv(globalConf, taskEnv, isMap); vertex.setTaskEnvironment(taskEnv); // All these classes are @InterfaceAudience.Private in Hadoop. Switch to Tez methods in TEZ-1012 // set the timestamps, public/private visibility of the archives and files ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(globalConf); // get DelegationToken for each cached file ClientDistributedCacheManager.getDelegationTokens(globalConf, job.getCredentials()); MRApps.setupDistributedCache(globalConf, localResources); vertex.addTaskLocalFiles(localResources); vertex.setTaskLaunchCmdOpts(isMap ? MRHelpers.getJavaOptsForMRMapper(globalConf) : MRHelpers.getJavaOptsForMRReducer(globalConf)); log.info("For vertex - " + tezOp.getOperatorKey().toString() + ": parallelism=" + tezOp.getVertexParallelism() + ", memory=" + vertex.getTaskResource().getMemory() + ", java opts=" + vertex.getTaskLaunchCmdOpts()); // Right now there can only be one of each of these. Will need to be // more generic when there can be more. for (POLoad ld : tezOp.getLoaderInfo().getLoads()) { // TODO: These should get the globalConf, or a merged version that // keeps settings like pig.maxCombinedSplitSize vertex.setLocationHint( VertexLocationHint.create(tezOp.getLoaderInfo().getInputSplitInfo().getTaskLocationHints())); vertex.addDataSource(ld.getOperatorKey().toString(), DataSourceDescriptor.create( InputDescriptor.create(MRInput.class.getName()) .setUserPayload(UserPayload.create(MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf(payloadConf)) .setSplits(tezOp.getLoaderInfo().getInputSplitInfo().getSplitsProto()).build() .toByteString().asReadOnlyByteBuffer())), InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()), dag.getCredentials())); } for (POStore store : stores) { ArrayList<POStore> emptyList = new ArrayList<POStore>(); ArrayList<POStore> singleStore = new ArrayList<POStore>(); singleStore.add(store); Configuration outputPayLoad = new Configuration(payloadConf); outputPayLoad.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(emptyList)); outputPayLoad.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(singleStore)); OutputDescriptor storeOutDescriptor = OutputDescriptor.create(MROutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(outputPayLoad)); if (tezOp.getVertexGroupStores() != null) { OperatorKey vertexGroupKey = tezOp.getVertexGroupStores().get(store.getOperatorKey()); if (vertexGroupKey != null) { getPlan().getOperator(vertexGroupKey).getVertexGroupInfo() .setStoreOutputDescriptor(storeOutDescriptor); continue; } } vertex.addDataSink(store.getOperatorKey().toString(), new DataSinkDescriptor(storeOutDescriptor, OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), dag.getCredentials())); } // LoadFunc and StoreFunc add delegation tokens to Job Credentials in // setLocation and setStoreLocation respectively. For eg: HBaseStorage // InputFormat add delegation token in getSplits and OutputFormat in // checkOutputSpecs. For eg: FileInputFormat and FileOutputFormat if (stores.size() > 0) { new PigOutputFormat().checkOutputSpecs(job); } // Set the right VertexManagerPlugin if (tezOp.getEstimatedParallelism() != -1) { if (tezOp.isGlobalSort() || tezOp.isSkewedJoin()) { // Set VertexManagerPlugin to PartitionerDefinedVertexManager, which is able // to decrease/increase parallelism of sorting vertex dynamically // based on the numQuantiles calculated by sample aggregation vertex vertex.setVertexManagerPlugin( VertexManagerPluginDescriptor.create(PartitionerDefinedVertexManager.class.getName())); log.info("Set VertexManagerPlugin to PartitionerDefinedParallelismVertexManager for vertex " + tezOp.getOperatorKey().toString()); } else { boolean containScatterGather = false; boolean containCustomPartitioner = false; for (TezEdgeDescriptor edge : tezOp.inEdges.values()) { if (edge.dataMovementType == DataMovementType.SCATTER_GATHER) { containScatterGather = true; } if (edge.partitionerClass != null) { containCustomPartitioner = true; } } if (containScatterGather && !containCustomPartitioner) { // Use auto-parallelism feature of ShuffleVertexManager to dynamically // reduce the parallelism of the vertex VertexManagerPluginDescriptor vmPluginDescriptor = VertexManagerPluginDescriptor .create(ShuffleVertexManager.class.getName()); Configuration vmPluginConf = ConfigurationUtil.toConfiguration(pc.getProperties(), false); vmPluginConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true); if (vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) != InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER) { vmPluginConf.setLong( ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE, vmPluginConf.getLong(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM, InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER)); } vmPluginDescriptor.setUserPayload(TezUtils.createUserPayloadFromConf(vmPluginConf)); vertex.setVertexManagerPlugin(vmPluginDescriptor); log.info("Set auto parallelism for vertex " + tezOp.getOperatorKey().toString()); } } } // Reset udfcontext jobconf. It is not supposed to be set in the front end UDFContext.getUDFContext().addJobConf(null); return vertex; }
From source file:org.apache.sqoop.mapreduce.mainframe.TestMainframeDatasetFTPRecordReader.java
License:Apache License
@Before public void setUp() throws IOException { mockFTPClient = mock(FTPClient.class); MainframeFTPClientUtils.setMockFTPClient(mockFTPClient); try {// w w w .j a va 2 s.c o m when(mockFTPClient.login("user", "pssword")).thenReturn(true); when(mockFTPClient.logout()).thenReturn(true); when(mockFTPClient.isConnected()).thenReturn(true); when(mockFTPClient.completePendingCommand()).thenReturn(true); when(mockFTPClient.changeWorkingDirectory(anyString())).thenReturn(true); when(mockFTPClient.getReplyCode()).thenReturn(200); when(mockFTPClient.noop()).thenReturn(200); when(mockFTPClient.setFileType(anyInt())).thenReturn(true); FTPFile ftpFile1 = new FTPFile(); ftpFile1.setType(FTPFile.FILE_TYPE); ftpFile1.setName("test1"); FTPFile ftpFile2 = new FTPFile(); ftpFile2.setType(FTPFile.FILE_TYPE); ftpFile2.setName("test2"); FTPFile[] ftpFiles = { ftpFile1, ftpFile2 }; when(mockFTPClient.listFiles()).thenReturn(ftpFiles); when(mockFTPClient.retrieveFileStream("test1")) .thenReturn(new ByteArrayInputStream("123\n456\n".getBytes())); when(mockFTPClient.retrieveFileStream("test2")) .thenReturn(new ByteArrayInputStream("789\n".getBytes())); when(mockFTPClient.retrieveFileStream("NotComplete")) .thenReturn(new ByteArrayInputStream("NotComplete\n".getBytes())); } catch (IOException e) { fail("No IOException should be thrown!"); } JobConf conf = new JobConf(); conf.set(DBConfiguration.URL_PROPERTY, "localhost:" + "11111"); conf.set(DBConfiguration.USERNAME_PROPERTY, "user"); conf.set(DBConfiguration.PASSWORD_PROPERTY, "pssword"); // set the password in the secure credentials object Text PASSWORD_SECRET_KEY = new Text(DBConfiguration.PASSWORD_PROPERTY); conf.getCredentials().addSecretKey(PASSWORD_SECRET_KEY, "pssword".getBytes()); conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, DummySqoopRecord.class, DBWritable.class); Job job = new Job(conf); mfDIS = new MainframeDatasetInputSplit(); mfDIS.addDataset("test1"); mfDIS.addDataset("test2"); context = mock(TaskAttemptContext.class); when(context.getConfiguration()).thenReturn(job.getConfiguration()); mfDFTPRR = new MainframeDatasetFTPRecordReader(); }
From source file:org.apache.tez.mapreduce.hadoop.TestMRInputHelpers.java
License:Apache License
private DataSourceDescriptor generateDataSourceDescriptorMapReduce(Path inputSplitsDir) throws Exception { JobConf jobConf = new JobConf(dfsCluster.getFileSystem().getConf()); jobConf.setUseNewMapper(true);//from w ww .j a v a 2 s . co m jobConf.setClass(org.apache.hadoop.mapreduce.MRJobConfig.INPUT_FORMAT_CLASS_ATTR, TextInputFormat.class, InputFormat.class); jobConf.set(TextInputFormat.INPUT_DIR, testFilePath.toString()); return MRInputHelpers.configureMRInputWithLegacySplitGeneration(jobConf, inputSplitsDir, true); }
From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java
License:Apache License
public void setUpJobConf(JobConf job) { job.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString()); job.set(MRConfig.LOCAL_DIR, workDir.toString()); job.setClass(Constants.TEZ_RUNTIME_TASK_OUTPUT_MANAGER, TezTaskOutputFiles.class, TezTaskOutput.class); job.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName()); job.setNumReduceTasks(1);//from w ww.j a va 2s. c om }
From source file:org.apache.tez.mapreduce.task.MRRuntimeTask.java
License:Apache License
private static void configureMRTask(JobConf job, MRTask task) throws IOException, InterruptedException { Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials(); job.setCredentials(credentials);/*from w ww .jav a 2s . co m*/ // TODO Can this be avoided all together. Have the MRTezOutputCommitter use // the Tez parameter. // TODO This could be fetched from the env if YARN is setting it for all // Containers. // Set it in conf, so as to be able to be used the the OutputCommitter. job.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, job.getInt(TezJobConfig.APPLICATION_ATTEMPT_ID, -1)); job.setClass(MRConfig.TASK_LOCAL_OUTPUT_CLASS, YarnOutputFiles.class, MapOutputFile.class); // MR Token<JobTokenIdentifier> jobToken = TokenCache.getJobToken(credentials); if (jobToken != null) { // Will MR ever run without a job token. SecretKey sk = JobTokenSecretManager.createSecretKey(jobToken.getPassword()); task.setJobTokenSecret(sk); } else { LOG.warn("No job token set"); } job.set(MRJobConfig.JOB_LOCAL_DIR, job.get(TezJobConfig.JOB_LOCAL_DIR)); job.set(MRConfig.LOCAL_DIR, job.get(TezJobConfig.LOCAL_DIRS)); if (job.get(TezJobConfig.DAG_CREDENTIALS_BINARY) != null) { job.set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, job.get(TezJobConfig.DAG_CREDENTIALS_BINARY)); } // setup the child's attempt directories // Do the task-type specific localization task.localizeConfiguration(job); // Set up the DistributedCache related configs setupDistributedCacheConfig(job); task.setConf(job); }
From source file:org.commoncrawl.hadoop.io.ARCInputFormat.java
License:Open Source License
/** * Sets the {@link ARCSource} implementation to use. * /* w ww.j a v a2s . c o m*/ * @param job * the job to set the {@link ARCSource} for * @param arcSource * the {@link Class} of the implementation to use * * @see #P_ARC_SOURCE */ public static void setARCSourceClass(JobConf job, Class arcSource) { job.setClass(P_ARC_SOURCE, arcSource, ARCSource.class); }
From source file:org.elasticsearch.hadoop.integration.mr.AbstractMROldApiSaveTest.java
License:Apache License
@Test public void testBasicMultiSave() throws Exception { JobConf conf = createJobConf(); conf.set(ConfigurationOptions.ES_RESOURCE, "oldapi/multi-save"); MultiOutputFormat.addOutputFormat(conf, EsOutputFormat.class); MultiOutputFormat.addOutputFormat(conf, PrintStreamOutputFormat.class); //MultiOutputFormat.addOutputFormat(conf, TextOutputFormat.class); PrintStreamOutputFormat.stream(conf, Stream.OUT); //conf.set("mapred.output.dir", "foo/bar"); //FileOutputFormat.setOutputPath(conf, new Path("foo/bar")); conf.setClass("mapred.output.format.class", MultiOutputFormat.class, OutputFormat.class); runJob(conf);//from w ww.j ava 2 s. c om }
From source file:org.sf.xrime.algorithms.statistics.VertexEdgeCounter.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { try {//from www .ja va 2s .c o m JobConf conf = new JobConf(context, AverageVertexDegree.class); conf.setJobName("AverageDegree"); // the keys are a pseudo one ("Average_Degree") conf.setOutputKeyClass(Text.class); // the values are degrees (ints) conf.setOutputValueClass(LongWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); // No combiner is permitted. conf.setReducerClass(ReduceClass.class); // The format of input data is generated with WritableSerialization. conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(conf, getSource().getPath()); FileOutputFormat.setOutputPath(conf, getDestination().getPath()); conf.setNumMapTasks(getMapperNum()); // Only one reducer is permitted, or the return value will be wrong. conf.setNumReduceTasks(1); // Set the possible CounterFilter class if (counterFilterClass != null) { conf.setClass(counterFilterKey, counterFilterClass, CounterFilter.class); } this.runningJob = JobClient.runJob(conf); FileSystem fs = getDestination().getPath().getFileSystem(conf); Path dataPath = new Path(getDestination().getPath().toString() + "/part-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, dataPath, conf); Writable key = ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf); Writable value = ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf); while (reader.next(key, value)) { addCounter(((Text) key).toString(), ((LongWritable) value).get()); } reader.close(); } catch (IOException e) { throw new ProcessorExecutionException(e); } catch (IllegalAccessException e) { throw new ProcessorExecutionException(e); } }