Example usage for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue)

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:edu.uci.ics.fuzzyjoin.hadoop.tokens.array.ReduceAggregate.java

License:Apache License

@Override
public void configure(JobConf job) {
    lengthStats = job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
            FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE);
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.tokens.array.ReduceSelect.java

License:Apache License

@Override
public void configure(JobConf job) {
    conf = job;//from  ww w.ja  v  a2 s . c  om
    lengthStats = job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
            FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    // job.set("mapred.job.tracker", "local");
    // job.set("fs.default.name", "file:///");

    JobConf job = new JobConf();
    job.setJarByClass(HybridDriver.class);
    new GenericOptionsParser(job, args);
    setMapperAndRunner(job);/*from   www .  j av  a2 s . co  m*/
    job.setMapOutputKeyClass(DocDocWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(DocDocWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    Path inputPath = new Path(INPUT_DIR);
    CustomSequenceFileInputFormat.addInputPath(job, inputPath);
    Path outputPath = new Path(OUTPUT_DIR);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    job.setBoolean("fs.hdfs.impl.disable.cache", true); //xun not sure if needed

    if (job.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE)) {
        job.setInputFormat(CustomSequenceFileInputFormat.class);
        Long splitMB = job.getLong(Config.SPLIT_MB_PROPERTY, Config.SPLIT_MB_VALUE) * 1024 * 1024;
        job.setLong("mapred.min.split.size", splitMB);
        job.setLong("mapred.max.split.size", splitMB);
        job.setLong("dfs.block.size", splitMB);
    } else {
        //  Comment the following of splitter for www experiments it assumes no splitting
        // of partitions for load balancing, should be fixed.
        Splitter.configure(job, inputPath);// remove comment unless for www
        job.setInputFormat(NonSplitableSequenceInputFormat.class); //remove comment
    }
    //SIGIR'14 two-stage balancing //not yet fully incorporated 
    if (job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE) != 0) {
        TwoStageLoadbalancing.main(job.getInt(Config.LOAD_BALANCE_PROPERTY, Config.LOAD_BALANCE_VALUE),
                new Path(PartDriver.OUTPUT_DIR), job);
    }
    JobSubmitter.run(job, "SIMILARITY", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
    if (job.getBoolean(Config.CONVERT_TEXT_PROPERTY, Config.CONVERT_TEXT_VALUE))
        IDMappingJob(args);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

/**
 * @param job : passed by reference to set its mapper class.
 *///from w ww. jav  a  2s .c o  m
public static void setMapperAndRunner(JobConf job) {
    int numSplits = job.getInt(Config.NUMBER_SPLITS_PROPERTY, Config.NUMBER_SPLITS_VALUE);
    int PSSChoice = job.getInt(Config.BLOCK_CHOICE_PROPERTY, Config.BLOCK_CHOICE_VALUE);//1,2
    String name = "PSS";
    if (numSplits > 1) {
        //check can I set # splits for runner here?
        job.setMapRunnerClass(MultipleS_Runner.class);
        if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) { // threads testing
            job.setMapperClass(PSS1_Threaded_Mapper.class);// naming
        } else if (PSSChoice == 1) {
            name += "1";
            job.setMapperClass(PSS1_Mapper.class);
        } else if (PSSChoice == 2) {
            name += "2";
            job.setMapperClass(PSS2_Mapper.class);// MultipleS_Block1_Mapper
        } else
            ;//For future implementations 
    } else {
        job.setMapRunnerClass(SingleS_Runner.class);
        if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) // threads
            throw new RuntimeException(
                    "ERROR: Single S with multithreads! Set hybrid.threads.property to false.");
        if (PSSChoice == 1) {
            job.setMapperClass(PSS_Mapper.class);
            if (job.getBoolean(Config.BAYADRO_SKIP_PROPERTY, Config.BAYADRO_SKIP_VALUE)) {
                name += "/Bayardo_Dynamic_filter";
                job.setMapperClass(PSS_Bayardo_Mapper.class);//PSS+Bayardo WWW'07
            }
        } else if (PSSChoice == 2) {
            name += "2/SingleS";
            job.setMapperClass(PSS2_SingleS_Mapper.class);
        } else
            job.setMapperClass(PSS3_SingleS_Mapper.class); //what is this?
    }
    job.setJobName(name);
}

From source file:edu.ucsb.cs.hybrid.io.Splitter.java

License:Apache License

public static void configure(JobConf job, Path inputPath) throws IOException {

    hdfs = FileSystem.get(job);//from   w w  w. j  a v a 2 s  . c  o m
    long initial_S = job.getInt(Config.MAP_S_PROPERTY, Config.MAP_S_VALUE);
    long nVectors = Collector.countDirVectors(hdfs, inputPath, job);
    if (initial_S > nVectors)
        try {
            throw new UnsupportedEncodingException(
                    "WARNING: Mapper's host partition \"S\" is larger than the total number of input vectors!\n"
                            + Config.MAP_S_PROPERTY + " is set to " + nVectors);

        } catch (UnsupportedEncodingException e) {
            initial_S = nVectors;
        }

    if (job.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE)) {
        prepareOneMap(job, inputPath, initial_S);
    } else {
        long S_size = initial_S; //get_max_S_size(nVectors, initial_S);
        Path splitsDir = splitAll(job, S_size, inputPath);
        hdfs.delete(inputPath, true);
        hdfs.rename(splitsDir, inputPath);
    }
}

From source file:edu.ucsb.cs.hybrid.io.TwoStageLoadbalancing.java

License:Apache License

public static void main(int step, Path inputDir, JobConf job) throws IOException {
    FileSystem hdfs = inputDir.getFileSystem(job);
    if (!hdfs.exists(Collector.partitionSizesPath)) {
        System.out.println("Partition sizes file does not exists!");
        return;/* w w  w  .j ava  2  s  .  co m*/
    }
    debugStages = job.getBoolean(Config.DEBUG_STAGES_PROPERTY, Config.DEBUG_STAGES_VALUE);
    MapFile.Reader partitionSizeReader = new MapFile.Reader(hdfs, Collector.partitionSizesPath.getName(),
            new JobConf());
    Text partitionK = new Text();
    LongWritable partSizeV = new LongWritable();

    try {
        while (partitionSizeReader.next(partitionK, partSizeV)) {
            partitionsNames.add(partitionK.toString()); // useless?
            partitionsSizes.put(partitionK.toString(), partSizeV.get());
        }
    } catch (Exception e) {
        ;
    }
    for (int i = 0; i < partitionsNames.size(); i++) {
        System.out.println("Partition " + partitionsNames.get(i) + " has "
                + partitionsSizes.get(partitionsNames.get(i)) + " vectors.");
    }

    if (partitionsNames.size() <= 1)
        return;
    stage0();
    printUndirectedNeighbors("Stage0");
    printPartitionsStat("Stage0");

    printCircularPartitionsWeight("\nCircular");
    calcCWStandardDeviation();

    stage1();
    printDirectedNeighbors("Stage1");
    System.out.println("Stage 1 final weights: ");
    printPartitionsWeights("Stage1");
    if ((step == 2) || (step == 12)) {
        stage2();
        printDirectedNeighbors("Stage2");
        System.out.println("Stage 2 final weights: ");
        printPartitionsWeights("Stage2");
    }
    // stage3(job, hdfs);
    writeComparisonList(job, hdfs);
    // printComparisonList(job, hdfs);// remove
}

From source file:edu.ucsb.cs.hybrid.mappers.PSS2_Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    loopsloopb = job.getBoolean(LOOPSLOOPB_PROPERTY, LOOPSLOOPB_VALUE);
    allocateCurrentB(currentB, blockSize);
}

From source file:edu.ucsb.cs.hybrid.mappers.PSS2_SingleS_Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job); // idComparison set ?
    loopsloopb = job.getBoolean(LOOPSLOOPB_PROPERTY, LOOPSLOOPB_VALUE);
    currentB = new IndexFeatureWeight[blockSize];
    for (i = 0; i < blockSize; i++)
        currentB[i] = new IndexFeatureWeight(0, Long.MAX_VALUE, 0);
    currentBpointers = new int[blockSize];
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java

License:Apache License

public static Reader getReader(JobConf conf) throws IOException {
    boolean oneMap = conf.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE);
    boolean splittable = conf.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE);

    if (!oneMap || splittable)
        return new Reader(conf, new Path(conf.get("map.input.file")),
                conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE));
    else//  ww  w .ja  v  a 2 s  .  c om
        return new OneMapReader(conf, new Path(conf.get("map.input.file")),
                conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE));
}

From source file:edu.ucsb.cs.partitioning.cosine.Organizer.java

License:Apache License

public static void readCombineCopy(Path input, String output, JobConf job) throws IOException {
    boolean printDist = job.getBoolean(Config.PRINT_DISTRIBUTION_PROPERTY, Config.PRINT_DISTRIBUTION_VALUE);
    BufferedWriter distout = null;
    SequenceFile.Writer out = null;
    if (printDist)
        distout = new BufferedWriter(new FileWriter("p-norm-distribution" + output));

    int pc = 0, pr = 0;
    float pChoice = job.getFloat(NormSortMain.P_NORM_PROPERTY, NormSortMain.P_NORM_VALUE);
    FileSystem hdfs = input.getFileSystem(new JobConf());
    FileStatus[] files = Partitioner.setFiles(hdfs, input);
    ArrayList<String> partitions = arrangeNames(files);

    for (int i = 0; i < partitions.size(); i++) {
        Path inputPath = new Path(input.toString() + "/" + partitions.get(i));
        if (hdfs.isDirectory(inputPath))
            continue;

        SequenceFile.Reader in = new SequenceFile.Reader(hdfs, inputPath, job);
        if (!isCombined(pr, pc, getRow(inputPath.getName()), getCol(inputPath.getName()), partitions)) {
            if (out != null)
                out.close();/*ww  w.j  a v a2  s. com*/
            pr = getRow(inputPath.getName());
            pc = getCol(inputPath.getName());
            out = SequenceFile.createWriter(hdfs, job, new Path(output + "/" + inputPath.getName()),
                    LongWritable.class, FeatureWeightArrayWritable.class, SequenceFile.CompressionType.NONE);
        }
        while (in.next(unused, document)) {
            out.append(new LongWritable(document.id),
                    new FeatureWeightArrayWritable(document.vectorSize, document.vector));
            if (printDist)
                distout.write(document.getPNorm(pChoice) + " \n");
        }
        in.close();
    }
    if (out != null)
        out.close();
}