Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:edu.uci.ics.fuzzyjoin.hadoop.datagen.MapNewRecord.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////  w w w  .  j av a  2 s  .  c om
    // create RecordGenerator
    //
    int offset = job.getInt(FuzzyJoinDriver.DATA_CRTCOPY_PROPERTY, -1);
    if (offset == -1) {
        System.err.println("ERROR: fuzzyjoin.data.crtcopy not set.");
        System.exit(-1);
    }
    recordGenerator = new RecordGenerator(offset);
    int noRecords = job.getInt(FuzzyJoinDriver.DATA_NORECORDS_PROPERTY, -1);
    if (noRecords == -1) {
        System.err.println("ERROR: fuzzyjoin.data.norecords not set.");
        System.exit(-1);
    }
    offsetRID = offset * noRecords;
    int dictionaryFactor = job.getInt(FuzzyJoinDriver.DATA_DICTIONARY_FACTOR_PROPERTY, 1);
    //
    // set RecordGenerator
    //
    Path tokenRankFile;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokenRankFile = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokenRankFile = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokenRankFile.toString(), recordGenerator);
    tokenLoad.loadTokenRank(dictionaryFactor);
    //
    // set Tokenizer
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    // Arrays.sort(dataColumns);
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBroadcastJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    LOG.info("Configure START");
    ///* w  w  w  .java  2 s . c o  m*/
    // read join index
    //
    Path[] cache = null;
    try {
        cache = DistributedCache.getLocalCacheFiles(job);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    if (cache == null) {
        addJoinIndex(new Path(job.get(FuzzyJoinDriver.DATA_JOININDEX_PROPERTY)));
    } else {
        for (Path path : cache) {
            addJoinIndex(path);
        }
    }
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
    LOG.info("Configure END");
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.recordpairs.MapBroadcastSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    LOG.info("Configure START");
    ///*  ww w  .j  av a2 s .c o m*/
    // read join index
    //
    Path[] cache = null;
    try {
        cache = DistributedCache.getLocalCacheFiles(job);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    if (cache == null) {
        addJoinIndex(new Path(job.get(FuzzyJoinDriver.DATA_JOININDEX_PROPERTY)));
    } else {
        for (Path path : cache) {
            addJoinIndex(path);
        }
    }
    LOG.info("Configure END");
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.AbstractHadoopOperatorDescriptor.java

License:Apache License

public void populateCache(JobConf jobConf) {
    try {/*from  www .  j  a  v a2  s  .  c o  m*/
        String cache = jobConf.get(MAPRED_CACHE_FILES);
        System.out.println("cache:" + cache);
        if (cache == null) {
            return;
        }
        String localCache = jobConf.get(MAPRED_CACHE_LOCALFILES);
        System.out.println("localCache:" + localCache);
        if (localCache != null) {
            return;
        }
        localCache = "";
        StringTokenizer cacheTokenizer = new StringTokenizer(cache, ",");
        while (cacheTokenizer.hasMoreTokens()) {
            if (!"".equals(localCache)) {
                localCache += ",";
            }
            try {
                localCache += DCacheClient.get().get(cacheTokenizer.nextToken());
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        jobConf.set(MAPRED_CACHE_LOCALFILES, localCache);
        System.out.println("localCache:" + localCache);
    } catch (Exception e) {

    }
}

From source file:edu.ucsb.cs.hybrid.mappers.PSS3_SingleS_Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    PostingDocWeight[] b = new PostingDocWeight[1];
    for (i = 0; i < r; i++)
        Spointers.addCol(b);/*w ww.  jav  a 2s  .com*/
    ArrayList<PostingDocWeight> a = new ArrayList<PostingDocWeight>();
    for (i = 0; i < r; i++)
        Bpointers.addRow(a);
    this.range = Integer.parseInt(job.get("RANGE"));
    this.r = Integer.parseInt(job.get("R"));
    Bdone = new BitSet(blockSize);
    blockCurrent = new IndexTermWeight[blockSize];
    for (i = 0; i < blockSize; i++)
        blockCurrent[i] = new IndexTermWeight();
    BdocumentLen = new int[blockSize];
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java

License:Apache License

public static Reader getReader(JobConf conf) throws IOException {
    boolean oneMap = conf.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE);
    boolean splittable = conf.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE);

    if (!oneMap || splittable)
        return new Reader(conf, new Path(conf.get("map.input.file")),
                conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE));
    else//w  w  w.  j  a  va  2s.c o m
        return new OneMapReader(conf, new Path(conf.get("map.input.file")),
                conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE));
}

From source file:edu.ucsb.cs.preprocessing.sequence.SeqMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    Path idPath = new Path(HashPagesDriver.IDS_FILE1);
    readIdMappings(job, idPath);/*  w ww  .  j av a2s .  c o m*/
    openIdsMapping(HashPagesDriver.IDS_FILE2, job.get("mapred.task.partition"));
}

From source file:edu.umd.cloud9.collection.trec.BuildTrecForwardIndex.java

License:Apache License

/**
 * Runs this tool.//from w  w  w . j ava 2 s  .  c om
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(getConf(), BuildTrecForwardIndex.class);
    FileSystem fs = FileSystem.get(getConf());

    String collectionPath = args[0];
    String outputPath = args[1];
    String indexFile = args[2];
    String mappingFile = args[3];

    LOG.info("Tool name: " + BuildTrecForwardIndex.class.getCanonicalName());
    LOG.info(" - collection path: " + collectionPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - index file: " + indexFile);
    LOG.info(" - mapping file: " + mappingFile);

    conf.setJobName(BuildTrecForwardIndex.class.getSimpleName());

    conf.set("mapred.child.java.opts", "-Xmx1024m");
    conf.setNumReduceTasks(1);

    if (conf.get("mapred.job.tracker").equals("local")) {
        conf.set(DOCNO_MAPPING_FILE_PROPERTY, mappingFile);
    } else {
        DistributedCache.addCacheFile(new URI(mappingFile), conf);
    }

    FileInputFormat.setInputPaths(conf, new Path(collectionPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(TrecDocumentInputFormat.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    RunningJob job = JobClient.runJob(conf);

    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Count.DOCS).getCounter();

    String inputFile = outputPath + "/" + "part-00000";

    LOG.info("Writing " + numDocs + " doc offseta to " + indexFile);
    FSLineReader reader = new FSLineReader(inputFile, fs);

    FSDataOutputStream writer = fs.create(new Path(indexFile), true);

    writer.writeUTF(edu.umd.cloud9.collection.trec.TrecForwardIndex.class.getCanonicalName());
    writer.writeUTF(collectionPath);
    writer.writeInt(numDocs);

    int cnt = 0;
    Text line = new Text();
    while (reader.readLine(line) > 0) {
        String[] arr = line.toString().split("\\t");
        long offset = Long.parseLong(arr[1]);
        int len = Integer.parseInt(arr[2]);

        writer.writeLong(offset);
        writer.writeInt(len);

        cnt++;
        if (cnt % 100000 == 0) {
            LOG.info(cnt + " docs");
        }
    }
    reader.close();
    writer.close();
    LOG.info(cnt + " docs total. Done!");

    if (numDocs != cnt) {
        throw new RuntimeException("Unexpected number of documents in building forward index!");
    }

    return 0;
}

From source file:edu.umd.cloud9.webgraph.BuildReverseWebGraph.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), BuildReverseWebGraph.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    conf.setJobName("ReverseWebGraph");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    conf.set("mapreduce.task.timeout", "60000000");

    conf.setNumMapTasks(numMappers);//from  w  w w . j  a v  a  2 s .c o m
    conf.setNumReduceTasks(numReducers);
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(Reduce.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(ArrayListWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("BuildReverseWebGraph");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}

From source file:edu.umd.cloud9.webgraph.BuildWebGraph.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), BuildWebGraph.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    conf.setJobName("ConstructWebGraph");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    conf.set("mapreduce.task.timeout", "60000000");

    conf.setNumMapTasks(numMappers);/*from w  w  w.  java2 s.co  m*/
    conf.setNumReduceTasks(numReducers);

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(ArrayListWritable.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(ArrayListWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("BuildWebGraph");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}