Example usage for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name)

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:DataJoinMapperBase.java

License:Apache License

public void configure(JobConf job) {
    super.configure(job);
    this.job = job;
    this.inputFile = job.get("map.input.file");
    this.inputTag = generateInputTag(this.inputFile);
}

From source file:StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader)
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}

From source file:Text2FormatStorageMR.java

License:Open Source License

@SuppressWarnings({ "unchecked", "deprecation" })
public static void showSplits(JobConf conf) throws IOException {
    FormatStorageInputFormat inputFormat = new FormatStorageInputFormat();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    int size = splits.length;
    System.out.println("getSplits return size:" + size);
    for (int i = 0; i < size; i++) {
        FormatStorageSplit split = (FormatStorageSplit) splits[i];
        System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:"
                + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:"
                + split.getEndLine() + "\n");
    }/*from  w  w w . j av a  2  s  . c o m*/
}

From source file:TestFormatStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {//from www. j  av  a2 s  .  c om
        if (argv.length != 2) {
            System.out.println("TestFormatStorageInputFormat <input> <output>");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestFormatStorageInputFormat.class);

        conf.setJobName("TestFormatStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(FormatStorageOutputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        Head head = new Head();
        initHead(head);

        head.toJobConf(conf);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new FormatStorageInputFormat();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            FormatStorageSplit split = (FormatStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:"
                    + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine() + "\n");
        }

        {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                long begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {
                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;
                }
                long end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay);
        }
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:Text2ColumntStorageMR.java

License:Open Source License

@SuppressWarnings({ "unchecked", "deprecation" })
public static void showSplits(JobConf conf) throws IOException {
    ColumnStorageInputFormat inputFormat = new ColumnStorageInputFormat();
    InputSplit[] splits = inputFormat.getSplits(conf, 1);
    int size = splits.length;
    System.out.println("getSplits return size:" + size);
    for (int i = 0; i < size; i++) {
        ColumnStorageSplit split = (ColumnStorageSplit) splits[i];
        System.out.printf("split:" + i + "offset:" + split.getStart() + "len:" + split.getLength() + "path:"
                + conf.get(ConstVar.InputPath) + "beginLine:" + split.getBeginLine() + "endLine:"
                + split.getEndLine() + "\n");
    }//  ww  w.j a  va2 s. c o  m
}

From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java

License:Apache License

@SuppressWarnings("rawtypes")
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass)
        throws IOException, URISyntaxException {
    JobConf conf = new JobConf();
    // set custom class loader with custom find resource strategy.

    conf.setJobName(getJobName());//from   ww w  .j a va 2  s .c  o  m
    conf.setMapperClass(mapperClass);
    if (reducerClass != null) {
        conf.setReducerClass(reducerClass);
    }

    if (props.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        logger.info("Running locally, no hadoop jar set.");
    } else {
        HadoopUtils.setClassLoaderAndJar(conf, getClass());
        logger.info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
        logger.info("*************************************************************************");
        logger.info(
                "          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ")           ");
        logger.info("*************************************************************************");
    }

    // set JVM options if present
    if (props.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts"));
        logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts"));
    }

    // set input and output paths if they are present
    if (props.containsKey("input.paths")) {
        List<String> inputPaths = props.getStringList("input.paths");
        if (inputPaths.size() == 0)
            throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
        for (String path : inputPaths) {
            HadoopUtils.addAllSubPaths(conf, new Path(path));
        }
    }

    if (props.containsKey("output.path")) {
        String location = props.get("output.path");
        FileOutputFormat.setOutputPath(conf, new Path(location));

        // For testing purpose only remove output file if exists
        if (props.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }

    // Adds External jars to hadoop classpath
    String externalJarList = props.getString("hadoop.external.jarFiles", null);
    if (externalJarList != null) {
        FileSystem fs = FileSystem.get(conf);
        String[] jarFiles = externalJarList.split(",");
        for (String jarFile : jarFiles) {
            logger.info("Adding extenral jar File:" + jarFile);
            DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs);
        }
    }

    // Adds distributed cache files
    String cacheFileList = props.getString("hadoop.cache.files", null);
    if (cacheFileList != null) {
        String[] cacheFiles = cacheFileList.split(",");
        for (String cacheFile : cacheFiles) {
            logger.info("Adding Distributed Cache File:" + cacheFile);
            DistributedCache.addCacheFile(new URI(cacheFile), conf);
        }
    }

    // Adds distributed cache files
    String archiveFileList = props.getString("hadoop.cache.archives", null);
    if (archiveFileList != null) {
        String[] archiveFiles = archiveFileList.split(",");
        for (String archiveFile : archiveFiles) {
            logger.info("Adding Distributed Cache Archive File:" + archiveFile);
            DistributedCache.addCacheArchive(new URI(archiveFile), conf);
        }
    }

    String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null);
    if (hadoopCacheJarDir != null) {
        FileSystem fs = FileSystem.get(conf);
        if (fs != null) {
            FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));

            if (status != null) {
                for (int i = 0; i < status.length; ++i) {
                    if (!status[i].isDir()) {
                        Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
                        logger.info("Adding Jar to Distributed Cache Archive File:" + path);

                        DistributedCache.addFileToClassPath(path, conf, fs);
                    }
                }
            } else {
                logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
            }
        } else {
            logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist");
        }
    }

    for (String key : getProps().getKeySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, getProps().get(key));
        }
    }

    HadoopUtils.setPropsInJob(conf, getProps());

    // put in tokens
    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
        conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
    }

    return conf;
}

From source file:babel.prep.extract.PageExtMapper.java

License:Apache License

/**
 * Recovers the nutch segment processed by this mapper from job config.
 *///w  w  w .  j  a  va 2s  .co m
public void configure(JobConf job) throws IllegalArgumentException {
    // Recover the segment processed by this mapper
    String fileDir = job.get("map.input.file");
    String segmentsDir = job.get(NutchPageExtractor.JOB_PROP_SEGMENTS_DIR);
    int idx;

    if (fileDir == null || segmentsDir == null || ((idx = fileDir.indexOf(segmentsDir)) < 0)) {
        throw new IllegalArgumentException("Could not recover mapper's segment.");
    } else {
        String segment = fileDir.substring(idx + segmentsDir.length());
        idx = segment.startsWith(File.separator) ? 1 : 0;
        m_segmentId = segment.substring(idx, segment.indexOf(File.separator, idx));
    }
}

From source file:babel.prep.langid.LangIdMapper.java

License:Apache License

/**
 * Sets up the language detector./*from   w w w  .ja va  2 s. c  o m*/
 */
public void configure(JobConf job) {
    String referrer = job.get(LangIdentifier.JOB_PROP_JOB_REFERRER);

    // m_detector = new NutchLangDetector(job); -OR-
    m_detector = new GoogleLangDetector(referrer);
}

From source file:babel.prep.langidtime.LangAndTimeMapper.java

License:Apache License

/**
 * Sets up the language detector.//from ww w. j a va2s.  c o m
 */
public void configure(JobConf job) {
    String referrer = job.get(LangAndTimeExtractor.JOB_PROP_JOB_REFERRER);

    m_extractor = new URLAndContentsLangTimeExtractor(referrer);
}

From source file:boa.datagen.SeqSort.java

License:Apache License

/**
 * The main driver for sort program./*from  w  w w.  j ava 2  s  .  c o  m*/
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println(inPath);

    JobConf jobConf = new JobConf(getConf(), SeqSort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(BytesWritable.class);

    SequenceFileOutputFormat.setCompressOutput(jobConf, true);
    SequenceFileOutputFormat.setOutputCompressorClass(jobConf, SnappyCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(jobConf, CompressionType.BLOCK);

    // Make sure there are exactly 2 parameters left.
    FileInputFormat.setInputPaths(jobConf, inPath);
    FileOutputFormat.setOutputPath(jobConf, new Path(outPath));

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}