Example usage for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue)

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private HivePartitionWriter writerForLocation(String location) throws IOException {
    JobConf clonedConf = new JobConf(jobConf);
    clonedConf.set(OUTDIR, location);/*from  ww w  . j a v a  2 s.c  o  m*/
    OutputFormat outputFormat;
    try {
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        Class outputFormatClz = Class.forName(sd.getOutputFormat(), true,
                Thread.currentThread().getContextClassLoader());
        outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz);
        outputFormat = (OutputFormat) outputFormatClz.newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e);
    }
    ReflectionUtils.setConf(outputFormat, clonedConf);
    OutputCommitter outputCommitter = clonedConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(clonedConf, new JobID());
    outputCommitter.setupJob(jobContext);
    final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
    if (isCompressed) {
        String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
            try {
                Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class
                        .forName(codecStr, true, Thread.currentThread().getContextClassLoader());
                FileOutputFormat.setOutputCompressorClass(clonedConf, codec);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }
        String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
            SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
            SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style);
        }
    }
    String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1));
    Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition);
    FileSinkOperator.RecordWriter recordWriter;
    try {
        recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed,
                tblProperties, taskPath, Reporter.NULL);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter);
}

From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java

License:Apache License

/**
 * Create new multi-line json object reader.
 * /*w w  w. ja v a 2 s. com*/
 * @param conf
 *            Hadoop context
 * @param split
 *            HDFS split to start the reading from
 * @throws IOException IOException when reading the file
 */
public JsonRecordReader(JobConf conf, FileSplit split) throws IOException {

    this.jsonMemberName = conf.get(RECORD_MEMBER_IDENTIFIER);
    this.maxObjectLength = conf.getInt(RECORD_MAX_LENGTH, Integer.MAX_VALUE);

    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(split.getPath());
    if (codec != null) {
        is = codec.createInputStream(fileIn);
        start = 0;
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            fileIn.seek(start);
        }
        is = fileIn;
    }
    parser = new PartitionedJsonParser(is);
    this.pos = start;
}

From source file:org.apache.mahout.cf.taste.hadoop.similarity.item.SimilarityReducer.java

License:Apache License

@Override
public void configure(JobConf jobConf) {
    super.configure(jobConf);
    distributedItemSimilarity = ItemSimilarityJob
            .instantiateSimilarity(jobConf.get(ItemSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME));
    numberOfUsers = jobConf.getInt(ItemSimilarityJob.NUMBER_OF_USERS, -1);
    if (numberOfUsers <= 0) {
        throw new IllegalStateException("Number of users was not set correctly");
    }/* w  w w. jav a2s  .com*/
}

From source file:org.apache.mahout.df.mapred.partial.Step1Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);

    configure(Builder.getRandomSeed(job), job.getInt("mapred.task.partition", -1), job.getNumMapTasks(),
            Builder.getNbTrees(job));/*from www . j a  v  a  2 s.c  om*/
}

From source file:org.apache.mahout.df.mapred.partial.Step2Mapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    // get the cached files' paths
    URI[] files;/*from   w ww  . java  2 s . c o m*/
    try {
        files = DistributedCache.getCacheFiles(job);
    } catch (IOException e) {
        throw new IllegalStateException("Exception while getting the cache files : ", e);
    }

    if ((files == null) || (files.length < 2)) {
        throw new IllegalArgumentException("missing paths from the DistributedCache");
    }

    Dataset dataset;
    try {
        Path datasetPath = new Path(files[0].getPath());
        dataset = Dataset.load(job, datasetPath);
    } catch (IOException e) {
        throw new IllegalStateException("Exception while loading the dataset : ", e);
    }

    int numMaps = job.getNumMapTasks();
    int p = job.getInt("mapred.task.partition", -1);

    // total number of trees in the forest
    int numTrees = Builder.getNbTrees(job);
    if (numTrees == -1) {
        throw new IllegalArgumentException("numTrees not found !");
    }

    int nbConcerned = nbConcerned(numMaps, numTrees, p);
    keys = new TreeID[nbConcerned];
    trees = new Node[nbConcerned];

    int numInstances;

    try {
        Path forestPath = new Path(files[1].getPath());
        FileSystem fs = forestPath.getFileSystem(job);
        numInstances = InterResults.load(fs, forestPath, numMaps, numTrees, p, keys, trees);

        log.debug("partition: {} numInstances: {}", p, numInstances);
    } catch (IOException e) {
        throw new IllegalStateException("Exception while loading the forest : ", e);
    }

    configure(p, dataset, keys, trees, numInstances);
}

From source file:org.apache.nutch.crawl.CrawlDbReducer.java

License:Apache License

public void configure(JobConf job) {
    retryMax = job.getInt("db.fetch.retry.max", 3);
    scfilters = new ScoringFilters(job);
    additionsAllowed = job.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true);
    int oldMaxInterval = job.getInt("db.max.fetch.interval", 0);
    maxInterval = job.getInt("db.fetch.interval.max", 0);
    if (oldMaxInterval > 0 && maxInterval == 0)
        maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY;
    schedule = FetchScheduleFactory.getFetchSchedule(job);
    int maxLinks = job.getInt("db.update.max.inlinks", 10000);
    linked = new InlinkPriorityQueue(maxLinks);
}

From source file:org.apache.nutch.crawl.LinkDbMerger.java

License:Apache License

public void configure(JobConf job) {
    maxInlinks = job.getInt("db.max.inlinks", 10000);
}

From source file:org.apache.nutch.crawl.PartitionUrlByHost.java

License:Apache License

public void configure(JobConf job) {
    seed = job.getInt("partition.url.by.host.seed", 0);
    normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_PARTITION);
}

From source file:org.apache.nutch.indexer.lucene.LuceneWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    this.fs = FileSystem.get(job);
    perm = new Path(FileOutputFormat.getOutputPath(job), name);
    temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt()));

    fs.delete(perm, true); // delete old, if any
    analyzerFactory = new AnalyzerFactory(job);
    writer = new IndexWriter(FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())),
            new NutchDocumentAnalyzer(job), true, MaxFieldLength.UNLIMITED);

    writer.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
    writer.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));
    writer.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE));
    writer.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128));
    writer.setMaxFieldLength(job.getInt("indexer.max.tokens", 10000));
    writer.setInfoStream(LogUtil.getDebugStream(Indexer.LOG));
    writer.setUseCompoundFile(false);/*from ww  w.  j a v  a 2 s  .c  o  m*/
    writer.setSimilarity(new NutchSimilarity());

    processOptions(job);
}

From source file:org.apache.nutch.indexwriter.lucene.LuceneWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    this.fs = FileSystem.get(job);
    perm = new Path(FileOutputFormat.getOutputPath(job), name);
    temp = job.getLocalPath("index/_" + Integer.toString(new Random().nextInt()));
    fs.delete(perm, true); // delete old, if any
    analyzerFactory = new AnalyzerFactory(job);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_2,
            new SmartChineseAnalyzer());
    LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(job.getInt("indexer.mergeFactor", 10));
    mergePolicy.setMaxMergeDocs(job.getInt("indexer.maxMergeDocs", Integer.MAX_VALUE));

    indexWriterConfig.setMergePolicy(mergePolicy);
    indexWriterConfig.setUseCompoundFile(false);
    indexWriterConfig.setTermIndexInterval(job.getInt("indexer.termIndexInterval", 128));
    indexWriterConfig.setMaxBufferedDocs(job.getInt("indexer.minMergeDocs", 100));
    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writer = new org.apache.lucene.index.IndexWriter(
            FSDirectory.open(new File(fs.startLocalOutput(perm, temp).toString())), indexWriterConfig);

    /*//from w  w  w  . j a  va2 s  .  c o m
     * addFieldOptions("title", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("url", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("content", STORE.YES, INDEX.TOKENIZED, VECTOR.NO, job);
     * addFieldOptions("lang", STORE.YES, INDEX.UNTOKENIZED, VECTOR.NO, job);
     */

    processOptions(job);
}