List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:hudson.plugins.hadoop.JobTrackerStartTask.java
License:Open Source License
public Void call() throws Exception { // Configuration conf = new Configuration(); JobConf jc = new JobConf(); jc.set("fs.default.name", hdfsUrl); jc.set("mapred.job.tracker", jobTrackerAddress); jc.set("mapred.job.tracker.http.address", "0.0.0.0:" + HTTP_PORT); jc.set("mapred.local.dir", new File(hudsonRoot, "hadoop/mapred").getPath()); tracker = JobTracker.startTracker(jc); new Thread(this).start(); return null;//from w ww .j a v a 2 s .c om }
From source file:hudson.plugins.hadoop.TaskTrackerStartTask.java
License:Open Source License
public Void call() throws IOException { System.out.println("Starting data node"); JobConf conf = new JobConf(); conf.set("fs.default.name", hdfsUrl); conf.set("mapred.job.tracker", jobTrackerAddress); conf.set("mapred.task.tracker.http.address", "0.0.0.0:0"); conf.set("mapred.task.tracker.report.address", "0.0.0.0:0"); conf.set("mapred.local.dir", new File(new File(rootPath), "hadoop/task-tracker").getAbsolutePath()); conf.set("slave.host.name", slaveHostName); new Thread(new TaskTracker(conf)).start(); return null;// w w w . java 2 s . c om }
From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java
License:Apache License
/** * sinkConfInit is called by cascading to set up the sinks. This happens on * the client side before the job is distributed. There is a check for the * presence of a schema and an exception is thrown if none has been * provided. After the schema check the conf object is given the options * that Avro needs.//from ww w. jav a2 s .c om * * @param flowProcess * The cascading FlowProcess object. Should be passed in by * cascading automatically. * @param tap * The cascading Tap object. Should be passed in by cascading * automatically. * @param conf * The Hadoop JobConf object. This is passed in by cascading * automatically. * @throws RuntimeException * If no schema is present this halts the entire process. */ @Override public void sinkConfInit(FlowProcess<? extends JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { if (schema == null) { throw new RuntimeException("Must provide sink schema"); } // Set the output schema and output format class conf.set(AvroJob.OUTPUT_SCHEMA, schema.toString()); conf.setOutputFormat(AvroOutputFormat.class); // add AvroSerialization to io.serializations addAvroSerializations(conf); }
From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java
License:Apache License
/** * sourceConfInit is called by cascading to set up the sources. This happens * on the client side before the job is distributed. There is a check for * the presence of a schema and if none has been provided the data is peeked * at to get a schema. After the schema check the conf object is given the * options that Avro needs./*from w ww . j a v a 2s.c om*/ * * @param flowProcess * The cascading FlowProcess object. Should be passed in by * cascading automatically. * @param tap * The cascading Tap object. Should be passed in by cascading * automatically. * @param conf * The Hadoop JobConf object. This is passed in by cascading * automatically. * @throws RuntimeException * If no schema is present this halts the entire process. */ @Override public void sourceConfInit(FlowProcess<? extends JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { retrieveSourceFields(flowProcess, tap); // Set the input schema and input class conf.set(AvroJob.INPUT_SCHEMA, schema.toString()); conf.setInputFormat(AvroInputFormat.class); // add AvroSerialization to io.serializations addAvroSerializations(conf); }
From source file:hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme.java
License:Apache License
@Override public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { jobConf.setInputFormat(DeprecatedParquetInputFormat.class); jobConf.set("hive.parquet.timestamp.skip.conversion", "false"); ParquetInputFormat.setReadSupportClass(jobConf, DataWritableReadSupport.class); }
From source file:hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class); jobConf.set(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA, HiveParquetSchemeHelper.getParquetSchemeMessage(hiveTableDescriptor)); ParquetOutputFormat.setWriteSupportClass(jobConf, DataWritableWriteSupport.class); }
From source file:hydrograph.engine.cascading.scheme.parquet.ParquetTupleScheme.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class); jobConf.set(ParquetTupleWriter.PARQUET_CASCADING_SCHEMA, parquetSchema); ParquetOutputFormat.setWriteSupportClass(jobConf, ParquetTupleWriter.class); }
From source file:hydrograph.engine.hadoop.utils.HadoopConfigProvider.java
License:Apache License
private static void copyProperties(JobConf jobConf, Map<Object, Object> properties) { if (properties instanceof Properties) { Properties props = (Properties) properties; Set<String> keys = props.stringPropertyNames(); for (String key : keys) jobConf.set(key, props.getProperty(key)); } else {/* ww w. j av a 2s. c om*/ for (Map.Entry<Object, Object> entry : properties.entrySet()) { if (entry.getValue() != null) jobConf.set(entry.getKey().toString(), entry.getValue().toString()); } } }
From source file:IndexService.IndexMergeMR.java
License:Open Source License
public static RunningJob run(String inputfiles, String outputdir, Configuration conf) { if (inputfiles == null || outputdir == null) return null; JobConf job = new JobConf(conf); job.setJobName("MergeIndexMR"); job.setJarByClass(IndexMergeMR.class); job.setNumReduceTasks(1);/*from w w w.j av a 2 s .c o m*/ FileSystem fs = null; try { fs = FileSystem.get(job); fs.delete(new Path(outputdir), true); String[] ifs = inputfiles.split(","); TreeSet<String> files = new TreeSet<String>(); for (int i = 0; i < ifs.length; i++) { IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[i]); Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values(); for (String str : strs) { files.add(str); } ifdf.close(); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str + ","); } job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(ifs[0]); HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes(); ArrayList<String> fieldStrings = new ArrayList<String>(); for (int i = 0; i < map.size(); i++) { IRecord.IFType type = map.get(i); fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx()); } job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()])); job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456)); ifdf.close(); } catch (Exception e2) { e2.printStackTrace(); } FileInputFormat.setInputPaths(job, inputfiles); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(IndexKey.class); job.setOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexMergePartitioner.class); job.setMapperClass(MergeIndexMap.class); job.setCombinerClass(MergeIndexReduce.class); job.setReducerClass(MergeIndexReduce.class); job.setInputFormat(IndexMergeIFormatInputFormat.class); job.setOutputFormat(IndexMergeIFormatOutputFormat.class); try { JobClient jc = new JobClient(job); return jc.submitJob(job); } catch (IOException e) { e.printStackTrace(); return null; } }
From source file:IndexService.IndexMR.java
License:Open Source License
public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids, String outputdir) {/* w w w.j a va2s.c o m*/ if (inputfiles == null || outputdir == null) return null; JobConf conf = new JobConf(conf2); conf.setJobName("IndexMR:\t" + ids); conf.setJarByClass(IndexMR.class); FileSystem fs = null; try { fs = FileSystem.get(conf); fs.delete(new Path(outputdir), true); } catch (IOException e3) { e3.printStackTrace(); } conf.set("index.ids", ids); if (column) { conf.set("datafiletype", "column"); } else { conf.set("datafiletype", "format"); } String[] ifs = inputfiles.split(","); long wholerecnum = 0; String[] idxs = ids.split(","); String[] fieldStrings = new String[idxs.length + 2]; if (!column) { IFormatDataFile ifdf; try { ifdf = new IFormatDataFile(conf); ifdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } ifdf.close(); } catch (IOException e) { e.printStackTrace(); } } else { try { IColumnDataFile icdf = new IColumnDataFile(conf); icdf.open(ifs[0]); for (int i = 0; i < idxs.length; i++) { int id = Integer.parseInt(idxs[i]); byte type = icdf.fieldtypes().get(id).type(); fieldStrings[i] = type + ConstVar.RecordSplit + i; } icdf.close(); } catch (IOException e) { e.printStackTrace(); } } fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit + (fieldStrings.length - 2); fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit + (fieldStrings.length - 1); conf.setStrings(ConstVar.HD_fieldMap, fieldStrings); if (!column) { conf.set(ConstVar.HD_index_filemap, inputfiles); for (String file : ifs) { IFormatDataFile fff; try { fff = new IFormatDataFile(conf); fff.open(file); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } } } else { HashSet<String> files = new HashSet<String>(); for (String file : ifs) { files.add(file); } StringBuffer sb = new StringBuffer(); for (String str : files) { sb.append(str).append(","); } conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1)); for (String file : files) { Path parent = new Path(file).getParent(); try { FileStatus[] fss = fs.listStatus(parent); String openfile = ""; for (FileStatus status : fss) { if (status.getPath().toString().contains(file)) { openfile = status.getPath().toString(); break; } } IFormatDataFile fff = new IFormatDataFile(conf); fff.open(openfile); wholerecnum += fff.segIndex().recnum(); fff.close(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } } } conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1)); FileInputFormat.setInputPaths(conf, inputfiles); Path outputPath = new Path(outputdir); FileOutputFormat.setOutputPath(conf, outputPath); conf.setOutputKeyClass(IndexKey.class); conf.setOutputValueClass(IndexValue.class); conf.setPartitionerClass(IndexPartitioner.class); conf.setMapperClass(IndexMap.class); conf.setCombinerClass(IndexReduce.class); conf.setReducerClass(IndexReduce.class); if (column) { conf.setInputFormat(IColumnInputFormat.class); } else { conf.setInputFormat(IFormatInputFormat.class); } conf.setOutputFormat(IndexIFormatOutputFormat.class); try { JobClient jc = new JobClient(conf); return jc.submitJob(conf); } catch (IOException e) { e.printStackTrace(); return null; } }