Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:edu.ucsb.cs.hybrid.io.Splitter.java

License:Apache License

public static void configure(JobConf job, Path inputPath) throws IOException {

    hdfs = FileSystem.get(job);//ww  w  . j a v a 2 s.  co  m
    long initial_S = job.getInt(Config.MAP_S_PROPERTY, Config.MAP_S_VALUE);
    long nVectors = Collector.countDirVectors(hdfs, inputPath, job);
    if (initial_S > nVectors)
        try {
            throw new UnsupportedEncodingException(
                    "WARNING: Mapper's host partition \"S\" is larger than the total number of input vectors!\n"
                            + Config.MAP_S_PROPERTY + " is set to " + nVectors);

        } catch (UnsupportedEncodingException e) {
            initial_S = nVectors;
        }

    if (job.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE)) {
        prepareOneMap(job, inputPath, initial_S);
    } else {
        long S_size = initial_S; //get_max_S_size(nVectors, initial_S);
        Path splitsDir = splitAll(job, S_size, inputPath);
        hdfs.delete(inputPath, true);
        hdfs.rename(splitsDir, inputPath);
    }
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    blockSize = job.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE);
    threshold = job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE);
}

From source file:edu.ucsb.cs.hybrid.mappers.SingleS_Runner.java

License:Apache License

public static Reader getReader(JobConf conf) throws IOException {
    boolean oneMap = conf.getBoolean(Config.SINGLE_MAP_PROPERTY, Config.SINGLE_MAP_VALUE);
    boolean splittable = conf.getBoolean(Config.SPLITABLE_PROPERTY, Config.SPLITABLE_VALUE);

    if (!oneMap || splittable)
        return new Reader(conf, new Path(conf.get("map.input.file")),
                conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE));
    else//from w w  w  .  j  a  v a 2  s  .  c o  m
        return new OneMapReader(conf, new Path(conf.get("map.input.file")),
                conf.getInt(Config.COMP_BLOCK_PROPERTY, Config.COMP_BLOCK_VALUE));
}

From source file:edu.ucsb.cs.lsh.minhash.LshMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    l = job.getInt(MinHashLshDriver.L_PROPERTY, MinHashLshDriver.L_VALUE);
    try {/*ww w  . j  av  a 2  s .c  o m*/
        Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
        // System.out.println("local:" + localFiles[0].getName());
        // FileSystem fs = localFiles[0].getFileSystem(job);
        FileSystem fs = FileSystem.get(job);
        // Reader reader = new SequenceFile.Reader(fs, localFiles[0], job);
        Reader reader = new SequenceFile.Reader(fs, new Path("lshfile"), job);
        reader.next(lsh);
        reader.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(MinHashLshDriver.class);
    job.setJobName(MinHashLshDriver.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();// ww w . j  a  v a  2 s.  com

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java

License:Apache License

public static void main(JobConf job) throws IOException {
    int nBits/*D*/, nFeatures/*K*/, nReducers;
    job.setJobName(ProjectionsGenerator.class.getSimpleName());
    FileSystem fs = FileSystem.get(job);

    nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    nFeatures = readCollectionFeatureCount(fs, job);
    setParameters(nBits, nFeatures);//from  w w  w  .j ava  2 s . c  om
    nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE);
    Path inputPath = new Path(INPUT_DIR);
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    if (fs.exists(inputPath))
        fs.delete(inputPath, true);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"),
            IntWritable.class, IntWritable.class);
    for (int i = 0; i < nReducers; i++)
        writer.append(new IntWritable(i), new IntWritable(i));
    writer.close();

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, false);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(nReducers);

    job.setMapperClass(IdentityMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ProjectionReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(RandomVector.class);

    JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java

License:Apache License

public static int readCollectionFeatureCount(FileSystem hdfs, JobConf job) throws IOException {
    Path nFeaturesPath = new Path(Properties.NUM_FEATURES_FILE);
    if (hdfs.exists(nFeaturesPath)) {
        BufferedReader br = new BufferedReader(
                new InputStreamReader(new DataInputStream(new FileInputStream(nFeaturesPath.toString()))));
        String line;/*w w  w  . jav  a 2  s.c  om*/
        if ((line = br.readLine()) != null)
            job.setInt(ProjectionLshDriver.LSH_NFEATURES_PROPERTY, Integer.parseInt(line));

    }
    return job.getInt(ProjectionLshDriver.LSH_NFEATURES_PROPERTY, ProjectionLshDriver.LSH_NFEATURES_VALUE);
}

From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf job = new JobConf(SignaturesGenerator.class);
    new GenericOptionsParser(job, args);
    job.setJobName(SignaturesGenerator.class.getSimpleName());
    int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    setParameters();//from   ww w .  ja v a2  s.  com
    FileSystem fs = FileSystem.get(job);
    prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR));
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath);

    FileInputFormat.setInputPaths(job, INPUT_DIR);
    // Path(INPUT_DIR));
    FileOutputFormat.setOutputPath(job, outputPath);
    // FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);
    job.setInt("mapred.task.timeout", 6000000);

    job.setMapperClass(SigMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BitSignature.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);

    JobSubmitter.run(job, "LSH", -1);
}

From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java

License:Apache License

/**
 * Sets MapReduce input configurations for the core cosine partitioning job.
 *//* w  ww.  j a  v a  2  s.  c o m*/
public static JobConf setMapReduce(JobConf job, Class mapper, Class reducer) {
    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(IntIntWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
    job.setNumReduceTasks(job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE));
    job.setReducerClass(reducer);
    job.setOutputKeyClass(IntIntWritable.class);
    job.setOutputValueClass(IdFeatureWeightArrayWritable.class);
    return job;
}

From source file:edu.ucsb.cs.partitioning.cosine.Partitioner.java

License:Apache License

/**
 * Uniformly partitions the sequence vectors given the number of partitions
 * input in the configuration file. It also prepares partitions information
 * about its partitions: maximum p-norms, weights or norm/weights/lengths in
 * a file to guide the core static partitioning next for skipping.
 * //  w w w. j a  v a  2s .c  om
 * @param norm_weight_all
 * @return
 */
public static JobConf main(String[] args, int norm_weight_all) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJarByClass(Partitioner.class);
    System.out.println(
            JobSubmitter.stars() + "\n Running partitioner to prepare uniform partitionins (Single JVM) ");

    String inputDir = SortDriver.OUTPUT_DIR, maxDir;

    if (norm_weight_all == 1)
        maxDir = "/maxpnorm";
    //      maxDir = inputDir + "/maxpnorm";
    else if (norm_weight_all == 2)
        maxDir = "/maxweight";
    //      maxDir = inputDir + "/maxweight";
    else
        maxDir = "/maxall";
    //      maxDir = inputDir + "/maxall";

    if (!(new Path(inputDir).getFileSystem(job)).exists(new Path(inputDir)))
        throw new UnsupportedOperationException("ERROR: " + inputDir + " directory not set.");

    job.set(MAX_DIR_PATH, maxDir);
    job.set(Config.NUM_PARTITIONS_PROPERTY, Integer.toString(produceStaticParitions(job, inputDir, OUTPUT_DIR,
            maxDir, job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE), norm_weight_all)));
    return job;
}