Example usage for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:hudson.plugins.hadoop.JobTrackerStartTask.java

License:Open Source License

public Void call() throws Exception {
    //        Configuration conf = new Configuration();
    JobConf jc = new JobConf();
    jc.set("fs.default.name", hdfsUrl);
    jc.set("mapred.job.tracker", jobTrackerAddress);
    jc.set("mapred.job.tracker.http.address", "0.0.0.0:" + HTTP_PORT);
    jc.set("mapred.local.dir", new File(hudsonRoot, "hadoop/mapred").getPath());
    tracker = JobTracker.startTracker(jc);

    new Thread(this).start();

    return null;//from  w ww .j  a v  a 2 s .c om
}

From source file:hudson.plugins.hadoop.TaskTrackerStartTask.java

License:Open Source License

public Void call() throws IOException {
    System.out.println("Starting data node");

    JobConf conf = new JobConf();
    conf.set("fs.default.name", hdfsUrl);
    conf.set("mapred.job.tracker", jobTrackerAddress);
    conf.set("mapred.task.tracker.http.address", "0.0.0.0:0");
    conf.set("mapred.task.tracker.report.address", "0.0.0.0:0");
    conf.set("mapred.local.dir", new File(new File(rootPath), "hadoop/task-tracker").getAbsolutePath());
    conf.set("slave.host.name", slaveHostName);

    new Thread(new TaskTracker(conf)).start();

    return null;// w w  w  .  java  2 s . c om
}

From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java

License:Apache License

/**
 * sinkConfInit is called by cascading to set up the sinks. This happens on
 * the client side before the job is distributed. There is a check for the
 * presence of a schema and an exception is thrown if none has been
 * provided. After the schema check the conf object is given the options
 * that Avro needs.//from ww w.  jav a2  s .c  om
 *
 * @param flowProcess
 *            The cascading FlowProcess object. Should be passed in by
 *            cascading automatically.
 * @param tap
 *            The cascading Tap object. Should be passed in by cascading
 *            automatically.
 * @param conf
 *            The Hadoop JobConf object. This is passed in by cascading
 *            automatically.
 * @throws RuntimeException
 *             If no schema is present this halts the entire process.
 */
@Override
public void sinkConfInit(FlowProcess<? extends JobConf> flowProcess,
        Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {

    if (schema == null) {
        throw new RuntimeException("Must provide sink schema");
    }
    // Set the output schema and output format class
    conf.set(AvroJob.OUTPUT_SCHEMA, schema.toString());
    conf.setOutputFormat(AvroOutputFormat.class);

    // add AvroSerialization to io.serializations
    addAvroSerializations(conf);
}

From source file:hydrograph.engine.cascading.scheme.avro.CustomAvroScheme.java

License:Apache License

/**
 * sourceConfInit is called by cascading to set up the sources. This happens
 * on the client side before the job is distributed. There is a check for
 * the presence of a schema and if none has been provided the data is peeked
 * at to get a schema. After the schema check the conf object is given the
 * options that Avro needs./*from w ww .  j a v  a 2s.c  om*/
 *
 * @param flowProcess
 *            The cascading FlowProcess object. Should be passed in by
 *            cascading automatically.
 * @param tap
 *            The cascading Tap object. Should be passed in by cascading
 *            automatically.
 * @param conf
 *            The Hadoop JobConf object. This is passed in by cascading
 *            automatically.
 * @throws RuntimeException
 *             If no schema is present this halts the entire process.
 */
@Override
public void sourceConfInit(FlowProcess<? extends JobConf> flowProcess,
        Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {

    retrieveSourceFields(flowProcess, tap);
    // Set the input schema and input class
    conf.set(AvroJob.INPUT_SCHEMA, schema.toString());
    conf.setInputFormat(AvroInputFormat.class);

    // add AvroSerialization to io.serializations
    addAvroSerializations(conf);
}

From source file:hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme.java

License:Apache License

@Override
public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf jobConf) {

    jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
    jobConf.set("hive.parquet.timestamp.skip.conversion", "false");
    ParquetInputFormat.setReadSupportClass(jobConf, DataWritableReadSupport.class);
}

From source file:hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme.java

License:Apache License

@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf jobConf) {
    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);

    jobConf.set(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA,
            HiveParquetSchemeHelper.getParquetSchemeMessage(hiveTableDescriptor));

    ParquetOutputFormat.setWriteSupportClass(jobConf, DataWritableWriteSupport.class);
}

From source file:hydrograph.engine.cascading.scheme.parquet.ParquetTupleScheme.java

License:Apache License

@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf jobConf) {
    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);

    jobConf.set(ParquetTupleWriter.PARQUET_CASCADING_SCHEMA, parquetSchema);
    ParquetOutputFormat.setWriteSupportClass(jobConf, ParquetTupleWriter.class);
}

From source file:hydrograph.engine.hadoop.utils.HadoopConfigProvider.java

License:Apache License

private static void copyProperties(JobConf jobConf, Map<Object, Object> properties) {
    if (properties instanceof Properties) {
        Properties props = (Properties) properties;
        Set<String> keys = props.stringPropertyNames();

        for (String key : keys)
            jobConf.set(key, props.getProperty(key));
    } else {/* ww w.  j  av a 2s.  c om*/
        for (Map.Entry<Object, Object> entry : properties.entrySet()) {
            if (entry.getValue() != null)
                jobConf.set(entry.getKey().toString(), entry.getValue().toString());
        }
    }
}

From source file:IndexService.IndexMergeMR.java

License:Open Source License

public static RunningJob run(String inputfiles, String outputdir, Configuration conf) {
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf job = new JobConf(conf);
    job.setJobName("MergeIndexMR");
    job.setJarByClass(IndexMergeMR.class);
    job.setNumReduceTasks(1);/*from   w  w  w.j av a  2  s .c  o m*/
    FileSystem fs = null;
    try {
        fs = FileSystem.get(job);
        fs.delete(new Path(outputdir), true);

        String[] ifs = inputfiles.split(",");
        TreeSet<String> files = new TreeSet<String>();
        for (int i = 0; i < ifs.length; i++) {
            IFormatDataFile ifdf = new IFormatDataFile(job);
            ifdf.open(ifs[i]);
            Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values();
            for (String str : strs) {
                files.add(str);
            }
            ifdf.close();
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str + ",");
        }
        job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        IFormatDataFile ifdf = new IFormatDataFile(job);
        ifdf.open(ifs[0]);

        HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes();
        ArrayList<String> fieldStrings = new ArrayList<String>();

        for (int i = 0; i < map.size(); i++) {
            IRecord.IFType type = map.get(i);
            fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx());
        }

        job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()]));
        job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456));
        ifdf.close();
    } catch (Exception e2) {
        e2.printStackTrace();
    }

    FileInputFormat.setInputPaths(job, inputfiles);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(IndexKey.class);
    job.setOutputValueClass(IndexValue.class);

    job.setPartitionerClass(IndexMergePartitioner.class);

    job.setMapperClass(MergeIndexMap.class);
    job.setCombinerClass(MergeIndexReduce.class);
    job.setReducerClass(MergeIndexReduce.class);

    job.setInputFormat(IndexMergeIFormatInputFormat.class);
    job.setOutputFormat(IndexMergeIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(job);
        return jc.submitJob(job);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:IndexService.IndexMR.java

License:Open Source License

public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids,
        String outputdir) {/* w w w.j a  va2s.c o  m*/
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf conf = new JobConf(conf2);
    conf.setJobName("IndexMR:\t" + ids);
    conf.setJarByClass(IndexMR.class);
    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.delete(new Path(outputdir), true);
    } catch (IOException e3) {
        e3.printStackTrace();
    }

    conf.set("index.ids", ids);
    if (column) {
        conf.set("datafiletype", "column");
    } else {
        conf.set("datafiletype", "format");
    }

    String[] ifs = inputfiles.split(",");
    long wholerecnum = 0;

    String[] idxs = ids.split(",");
    String[] fieldStrings = new String[idxs.length + 2];

    if (!column) {
        IFormatDataFile ifdf;
        try {
            ifdf = new IFormatDataFile(conf);
            ifdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            ifdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    } else {
        try {
            IColumnDataFile icdf = new IColumnDataFile(conf);
            icdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = icdf.fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            icdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit
            + (fieldStrings.length - 2);
    fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit
            + (fieldStrings.length - 1);

    conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);

    if (!column) {
        conf.set(ConstVar.HD_index_filemap, inputfiles);
        for (String file : ifs) {
            IFormatDataFile fff;
            try {
                fff = new IFormatDataFile(conf);
                fff.open(file);
                wholerecnum += fff.segIndex().recnum();
                fff.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    } else {
        HashSet<String> files = new HashSet<String>();
        for (String file : ifs) {
            files.add(file);
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str).append(",");
        }
        conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        for (String file : files) {
            Path parent = new Path(file).getParent();
            try {
                FileStatus[] fss = fs.listStatus(parent);
                String openfile = "";
                for (FileStatus status : fss) {
                    if (status.getPath().toString().contains(file)) {
                        openfile = status.getPath().toString();
                        break;
                    }
                }
                IFormatDataFile fff = new IFormatDataFile(conf);
                fff.open(openfile);
                wholerecnum += fff.segIndex().recnum();
                fff.close();

            } catch (IOException e) {
                e.printStackTrace();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1));

    FileInputFormat.setInputPaths(conf, inputfiles);
    Path outputPath = new Path(outputdir);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setOutputKeyClass(IndexKey.class);
    conf.setOutputValueClass(IndexValue.class);

    conf.setPartitionerClass(IndexPartitioner.class);

    conf.setMapperClass(IndexMap.class);
    conf.setCombinerClass(IndexReduce.class);
    conf.setReducerClass(IndexReduce.class);

    if (column) {
        conf.setInputFormat(IColumnInputFormat.class);
    } else {
        conf.setInputFormat(IFormatInputFormat.class);
    }
    conf.setOutputFormat(IndexIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(conf);
        return jc.submitJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}