Example usage for org.apache.hadoop.mapred JobConf getFloat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getFloat.

Prototype

public float getFloat(String name, float defaultValue)

Source Link

Document

Get the value of the name property as a float.

Usage

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridrecordpairs.ppjoin.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from ww w .  jav a  2s.  c  om*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);

    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridrecordpairs.token.ReduceVerifyListSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///* w  w w .j  a va2 s  .co  m*/
    // set SimilarityFilters
    //
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    similarityMetric = SimilarityMetricFactory.getSimilarityMetric(similarityName);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    // job.set("mapred.job.tracker", "local");
    // job.set("fs.default.name", "file:///");

    JobConf job = new JobConf();
    job.setJarByClass(HybridDriver.class);
    new GenericOptionsParser(job, args);
    setMapperAndRunner(job);//from w  ww. ja  v a  2 s.c  o m
    job.setMapOutputKeyClass(DocDocWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(DocDocWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    Path inputPath = new Path(INPUT_DIR);
    CustomSequenceFileInputFormat.addInputPath(job, inputPath);
    Path outputPath = new Path(OUTPUT_DIR);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed

    if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) {
        job.setInputFormat(CustomSequenceFileInputFormat.class);
        Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024;
        job.setLong("mapred.min.split.size", splitMB);
        job.setLong("mapred.max.split.size", splitMB);
        job.setLong("dfs.block.size", splitMB);
    } else {
        //  Comment the following of splitter for www experiments it assumes no splitting
        // of partitions for load balancing, should be fixed.
        Splitter.configure(job, inputPath);// remove comment unless for www
        job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment
    }
    //SIGIR'14 two-stage balancing //not yet fully incorporated 
    if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) {
        TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE),
                new Path(PartDriver.OUTPUT_DIR), job);
    }
    JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE))
        IDMappingJob(args);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void run(JobConf job) throws IOException {
    String ret = stars() + "\n  Running job:  " + job.getJobName() + "\n  Input Path:   {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }/*from w  w  w  .j  a v a 2  s.co  m*/
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path:  " + FileOutputFormat.getOutputPath(job) + "\n";
    ret += "  Threshold:    " + job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE) + "\n";
    System.err.println(ret);

    Date startTime = new Date();
    JobClient.runJob(job);
    Date end_time = new Date();
    System.err.println(
            "Similarity job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void IDMappingJob(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJarByClass(HybridDriver.class);
    job.setJobName("Converting binary similarity scores to text");
    job.setMapperClass(IDMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);/*  w  w w .j  a  va 2  s . c  om*/
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Path inputPath = new Path(OUTPUT_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    Path outputPath = new Path("SimilarityScores");
    job.setOutputFormat(TextOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);
    HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
    JobSubmitter.run(job, "BINARY TO TEXT", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    blockSize = job.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE);
    threshold = job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE);
}

From source file:edu.ucsb.cs.knn.KnnDriver.java

License:Apache License

/**
 * Submit the configured job to Hadoop JobTracker to start the process.
 *///from   w  w  w .j a  v  a2s .  com
public static void run(JobConf job) throws IOException {

    job.setJarByClass(KnnDriver.class); // This method sets the jar
    String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Tasks:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Tasks: " + job.getNumReduceTasks() + "\n";
    ret += "  Threshold:    " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n";
    System.out.println(ret);
    //
    // run job
    //
    JobClient.runJob(job);
}

From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(MinHashLshDriver.class);
    job.setJobName(MinHashLshDriver.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();/*from   ww  w  .  ja va 2s  .com*/

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java

License:Apache License

public static void main(JobConf job) throws IOException {
    int nBits/*D*/, nFeatures/*K*/, nReducers;
    job.setJobName(ProjectionsGenerator.class.getSimpleName());
    FileSystem fs = FileSystem.get(job);

    nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    nFeatures = readCollectionFeatureCount(fs, job);
    setParameters(nBits, nFeatures);//  w w  w. j av a  2  s  . com
    nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE);
    Path inputPath = new Path(INPUT_DIR);
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    if (fs.exists(inputPath))
        fs.delete(inputPath, true);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"),
            IntWritable.class, IntWritable.class);
    for (int i = 0; i < nReducers; i++)
        writer.append(new IntWritable(i), new IntWritable(i));
    writer.close();

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, false);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(nReducers);

    job.setMapperClass(IdentityMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ProjectionReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(RandomVector.class);

    JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.partitioning.cosine.CosineAllPartitionMain.java

License:Apache License

/**
 * Job3: Core Cosine partitioning with skipping based on partition maximum
 * vectors length, size and weight./* w  w w  .  j  a v  a2  s  .c om*/
 */
public static JobConf runCosinePartition(JobConf job, String[] args) throws IOException {
    new GenericOptionsParser(job, args);
    job.setJobName(Partitioner.class.getSimpleName() + " + " + CosineAllPartitionMain.class.getSimpleName());
    job.setJarByClass(CosineAllPartitionMain.class);
    job = setMapReduce(job, CosineAllPartMapper.class, IdentityReducer.class);
    job = setInputOutput(job, new Path(Partitioner.OUTPUT_DIR), interPath);
    JobSubmitter.run(job, "Cosine Partitioning",
            job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    FileSystem.get(job).delete(new Path(Partitioner.OUTPUT_DIR), true);
    return job;
}