Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:StorageEngineClient.CombineFileInputFormat.java

License:Open Source License

private void processsplit(JobConf job, Map.Entry<String, List<OneBlockInfo>> one,
        HashMap<OneBlockInfo, String[]> blockToNodes, long maxSize, long minSizeNode, long minSizeRack,
        List<CombineFileSplit> splits, String type) {
    ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>();
    ArrayList<String> nodes = new ArrayList<String>();
    long curSplitSize = 0;
    if (type.equals("node"))
        nodes.add(one.getKey());/*from   ww w .  ja v  a  2 s . com*/

    List<OneBlockInfo> blocks = null;
    if (!type.equals("all")) {
        blocks = one.getValue();
    } else {
        blocks = new ArrayList<OneBlockInfo>();
        blocks.addAll(blockToNodes.keySet());
    }

    OneBlockInfo[] blocksInNodeArr = blocks.toArray(new OneBlockInfo[blocks.size()]);
    if (job.getBoolean("hive.merge.inputfiles.sort", true)) {
        Arrays.sort(blocksInNodeArr, new Comparator<OneBlockInfo>() {
            @Override
            public int compare(OneBlockInfo o1, OneBlockInfo o2) {
                return (int) (o2.length - o1.length);
            }
        });
    }

    if (job.getBoolean("hive.merge.inputfiles.rerange", false)) {

        Random r = new Random(123456);
        OneBlockInfo tmp = null;
        for (int i = 0; i < blocksInNodeArr.length; i++) {
            int idx = r.nextInt(blocksInNodeArr.length);
            tmp = blocksInNodeArr[i];
            blocksInNodeArr[i] = blocksInNodeArr[idx];
            blocksInNodeArr[idx] = tmp;
        }
    }

    int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000);

    for (int i = 0; i < blocksInNodeArr.length; i++) {
        if (blockToNodes.containsKey(blocksInNodeArr[i])) {
            if (!type.equals("node")) {
                nodes.clear();
            }

            curSplitSize = blocksInNodeArr[i].length;
            validBlocks.clear();
            validBlocks.add(blocksInNodeArr[i]);
            blockToNodes.remove(blocksInNodeArr[i]);
            if (maxSize != 0 && curSplitSize >= maxSize) {
                addCreatedSplit(job, splits, nodes, validBlocks);
            } else {
                int filenum = 1;
                for (int j = i + 1; j < blocksInNodeArr.length; j++) {
                    if (blockToNodes.containsKey(blocksInNodeArr[j])) {
                        long size1 = blocksInNodeArr[j].length;
                        if (maxSize != 0 && curSplitSize + size1 <= maxSize) {
                            curSplitSize += size1;
                            filenum++;
                            validBlocks.add(blocksInNodeArr[j]);
                            blockToNodes.remove(blocksInNodeArr[j]);
                            if (!type.equals("node"))
                                for (int k = 0; k < blocksInNodeArr[j].hosts.length; k++) {
                                    nodes.add(blocksInNodeArr[j].hosts[k]);
                                }
                        }
                        if (filenum >= maxFileNumPerSplit) {
                            break;
                        }
                    }
                }
                if (minSizeNode != 0 && curSplitSize >= minSizeNode) {
                    addCreatedSplit(job, splits, nodes, validBlocks);
                } else {
                    for (OneBlockInfo oneblock : validBlocks) {
                        blockToNodes.put(oneblock, oneblock.hosts);
                    }
                    break;
                }
            }
        }
    }
}

From source file:StorageEngineClient.CombineFileInputFormat.java

License:Open Source License

private void processsplitForUnsplit(JobConf job, Map.Entry<String, List<OneBlockInfo>> one,
        HashMap<OneBlockInfo, String[]> blockToNodes, long maxSize, long minSizeNode, long minSizeRack,
        List<CombineFileSplit> splits, String type) {
    ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>();
    ArrayList<String> nodes = new ArrayList<String>();
    long curSplitSize = 0;
    if (type.equals("node"))
        nodes.add(one.getKey());/*www  .ja  v a 2s.c o m*/

    List<OneBlockInfo> blocks = null;
    if (!type.equals("all")) {
        blocks = one.getValue();
    } else {
        blocks = new ArrayList<OneBlockInfo>();
        blocks.addAll(blockToNodes.keySet());
    }

    OneBlockInfo[] blocksInNodeArr = blocks.toArray(new OneBlockInfo[blocks.size()]);
    if (job.getBoolean("hive.merge.inputfiles.sort", true)) {
        Arrays.sort(blocksInNodeArr, new Comparator<OneBlockInfo>() {
            @Override
            public int compare(OneBlockInfo o1, OneBlockInfo o2) {
                long comparereuslt = o2.length - o1.length;
                int result = 0;
                if (comparereuslt > 0)
                    result = 1;

                if (comparereuslt < 0)
                    result = -1;

                return result;
            }
        });
    }

    if (job.getBoolean("hive.merge.inputfiles.rerange", false)) {
        Random r = new Random(123456);
        OneBlockInfo tmp = null;
        for (int i = 0; i < blocksInNodeArr.length; i++) {
            int idx = r.nextInt(blocksInNodeArr.length);
            tmp = blocksInNodeArr[i];
            blocksInNodeArr[i] = blocksInNodeArr[idx];
            blocksInNodeArr[idx] = tmp;
        }
    }

    int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000);

    for (int i = 0; i < blocksInNodeArr.length; i++) {
        if (blockToNodes.containsKey(blocksInNodeArr[i])) {
            if (!type.equals("node")) {
                nodes.clear();
            }

            curSplitSize = blocksInNodeArr[i].length;
            validBlocks.clear();
            validBlocks.add(blocksInNodeArr[i]);
            blockToNodes.remove(blocksInNodeArr[i]);
            if (maxSize != 0 && curSplitSize >= maxSize) {
                if (!type.equals("node")) {
                    for (int k = 0; k < blocksInNodeArr[i].hosts.length; k++) {
                        nodes.add(blocksInNodeArr[i].hosts[k]);
                    }
                }
                addCreatedSplit(job, splits, nodes, validBlocks);
            } else {
                int filenum = 1;
                for (int j = i + 1; j < blocksInNodeArr.length; j++) {
                    if (blockToNodes.containsKey(blocksInNodeArr[j])) {
                        long size1 = blocksInNodeArr[j].length;
                        if (maxSize != 0 && curSplitSize < maxSize) {
                            curSplitSize += size1;
                            filenum++;
                            validBlocks.add(blocksInNodeArr[j]);
                            blockToNodes.remove(blocksInNodeArr[j]);
                        }
                        if (filenum >= maxFileNumPerSplit) {
                            break;
                        }

                        if (curSplitSize >= maxSize) {
                            break;
                        }
                    }
                }
                if (minSizeNode != 0 && curSplitSize >= minSizeNode) {
                    if (!type.equals("node")) {
                        generateNodesInfo(validBlocks, nodes);
                    }

                    addCreatedSplit(job, splits, nodes, validBlocks);
                } else {
                    for (OneBlockInfo oneblock : validBlocks) {
                        blockToNodes.put(oneblock, oneblock.hosts);
                    }
                    break;
                }
            }
        }
    }

    HashSet<OneBlockInfo> hs = new HashSet<OneBlockInfo>();
    while (blockToNodes.size() > 0) {
        validBlocks = new ArrayList<OneBlockInfo>();
        nodes = new ArrayList<String>();
        int filenum = 0;
        hs.clear();
        for (OneBlockInfo blockInfo : blockToNodes.keySet()) {
            filenum++;
            validBlocks.add(blockInfo);

            hs.add(blockInfo);
            if (filenum >= maxFileNumPerSplit) {
                break;
            }
        }
        for (OneBlockInfo blockInfo : hs) {
            blockToNodes.remove(blockInfo);
        }

        generateNodesInfo(validBlocks, nodes);

        this.addCreatedSplit(job, splits, nodes, validBlocks);
    }
}

From source file:StorageEngineClient.FormatStorageInputFormat_SplitByLineNum.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    List<FormatStorageInputSplit_WithLineNum> splits = new ArrayList<FormatStorageInputSplit_WithLineNum>();

    int lenNum = job.getInt("hive.inputfiles.line_num_per_split", 1000000);
    if (lenNum < 10000) {
        LOG.info("lenNum been set to " + lenNum + " is too small, so set it to 1000000");
        lenNum = 1000000;//  w w  w. j  a  va  2  s  .c  o  m
    }
    FileStatus[] fss = listStatus(job);

    FileStatus[] orignalFss = fss;
    List<FileStatus> fssList = new ArrayList<FileStatus>();
    for (int i = 0; i < fss.length; i++) {
        if (fss[i].getLen() > 0) {
            fssList.add(fss[i]);
        }
    }

    fss = (FileStatus[]) fssList.toArray(new FileStatus[0]);
    int listSize = fss.length;

    if (listSize == 0) {

        mapredWork mrWork = Utilities.getMapRedWork(job);
        Path inputPath = orignalFss[0].getPath();
        Path inputParentPath = inputPath.getParent();
        String inputPathStr = inputPath.toUri().toString();
        String inputPathParentStr = inputParentPath.toString();

        FileSystem fs = inputPath.getFileSystem(job);
        fs.delete(inputPath, true);

        LinkedHashMap<String, partitionDesc> partDescMap = mrWork.getPathToPartitionInfo();
        partitionDesc partDesc = partDescMap.get(inputPathParentStr);

        job.setBoolean("NeedPostfix", false);
        RecordWriter recWriter = new FormatStorageHiveOutputFormat().getHiveRecordWriter(job, inputPath,
                Text.class, false, partDesc.getTableDesc().getProperties(), null);
        recWriter.close(false);
        job.setBoolean("NeedPostfix", true);

        fss = listStatus(job);
    }

    Random r = new Random(123456);
    for (int i = 0; i < fss.length; i++) {
        int x = r.nextInt(fss.length);
        FileStatus tmp = fss[i];
        fss[i] = fss[x];
        fss[x] = tmp;
    }
    int[] fslengths = new int[fss.length];
    for (int i = 0; i < fss.length; i++) {
        IFormatDataFile ifdf = new IFormatDataFile(job);
        ifdf.open(fss[i].getPath().toString());
        fslengths[i] = ifdf.recnum();
        ifdf.close();
    }

    int id = 0;
    int offset = 0;
    int currlen = 0;
    ArrayList<FileSplit> currFileSplits = new ArrayList<FormatStorageInputFormat_SplitByLineNum.FileSplit>();
    while (true) {
        int need = lenNum - currlen;
        int remain = fslengths[id] - offset;

        if (need <= remain) {
            currFileSplits.add(new FileSplit(fss[id].getPath().toString(), offset, need));
            splits.add(new FormatStorageInputSplit_WithLineNum(
                    currFileSplits.toArray(new FileSplit[currFileSplits.size()]),
                    fss[id].getPath().getFileSystem(job).getFileBlockLocations(fss[id], 0, fss[id].getLen())[0]
                            .getHosts()));
            currFileSplits.clear();

            currlen = 0;

            offset += need;
        } else {
            if (remain != 0) {
                currFileSplits.add(new FileSplit(fss[id].getPath().toString(), offset, remain));
            }
            id++;
            offset = 0;
            currlen += remain;
        }

        if (id == fss.length) {
            if (currFileSplits.size() != 0) {
                splits.add(new FormatStorageInputSplit_WithLineNum(
                        currFileSplits.toArray(new FileSplit[currFileSplits.size()]),
                        fss[id - 1].getPath().getFileSystem(job).getFileBlockLocations(fss[id - 1], 0,
                                fss[id - 1].getLen())[0].getHosts()));
            }
            break;
        }
    }

    if (splits.size() == 0) {
        ArrayList<FileSplit> emptyFileSplits = new ArrayList<FormatStorageInputFormat_SplitByLineNum.FileSplit>();
        emptyFileSplits.add(new FileSplit(fss[0].getPath().toString(), 0, 0));

        splits.add(new FormatStorageInputSplit_WithLineNum(
                emptyFileSplits.toArray(new FileSplit[emptyFileSplits.size()]),
                fss[0].getPath().getFileSystem(job).getFileBlockLocations(fss[0], 0, fss[0].getLen())[0]
                        .getHosts()));
    }

    for (int i = 0; i < splits.size(); i++) {
        LOG.info(splits.get(i).toString());
    }

    LOG.info("Total # of splits: " + splits.size());
    return splits.toArray(new FormatStorageInputSplit_WithLineNum[splits.size()]);

}

From source file:tachyon.hadoop.fs.IOMapperBase.java

License:Apache License

public void configure(JobConf conf) {
    setConf(conf);/*from ww w . j  a  v a 2 s .  c o  m*/
    try {
        mFS = FileSystem.get(conf);
    } catch (Exception e) {
        throw new RuntimeException("Cannot create file system.", e);
    }
    mBufferSize = conf.getInt("test.io.file.buffer.size", 4096);
    mBuffer = new byte[mBufferSize];
    try {
        mHostname = InetAddress.getLocalHost().getHostName();
    } catch (Exception e) {
        mHostname = "localhost";
    }
}

From source file:uk.bl.wa.hadoop.indexer.mdx.WARCMDXGenerator.java

License:Open Source License

/**
 * //ww  w .j a  va  2  s.  c  om
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Store application properties where the mappers/reducers can access
    // them
    Config index_conf;
    if (this.configPath != null) {
        LOG.info("Loading config from: " + configPath);
        index_conf = ConfigFactory.parseFile(new File(this.configPath));
    } else {
        LOG.info("Using default config: mdx");
        index_conf = ConfigFactory.load("mdx");
    }
    if (this.dumpConfig) {
        ConfigPrinter.print(index_conf);
        System.exit(0);
    }
    conf.set(CONFIG_PROPERTIES, index_conf.withOnlyPath("warc").root().render(ConfigRenderOptions.concise()));
    LOG.info("Loaded warc config: " + index_conf.getString("warc.title"));

    // Reducer count:
    int numReducers = 10;
    if (index_conf.hasPath(WARC_HADOOP_NUM_REDUCERS)) {
        numReducers = index_conf.getInt(WARC_HADOOP_NUM_REDUCERS);
    }
    if (conf.getInt(WARC_HADOOP_NUM_REDUCERS, -1) != -1) {
        LOG.info("Overriding num_reducers using Hadoop config.");
        numReducers = conf.getInt(WARC_HADOOP_NUM_REDUCERS, numReducers);
    }

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(ArchiveFileInputFormat.class);
    conf.setMapperClass(WARCMDXMapper.class);
    conf.setReducerClass(MDXReduplicatingReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    // conf.setOutputFormat(TextOutputFormat.class);
    // SequenceFileOutputFormat.setOutputCompressionType(conf,
    // CompressionType.BLOCK);
    // OR TextOutputFormat?
    // conf.set("map.output.key.field.separator", "");
    // Compress the output from the maps, to cut down temp space
    // requirements between map and reduce.
    conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax
    // for 0.20.x ?
    conf.set("mapred.compress.map.output", "true");
    // conf.set("mapred.map.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.task.classpath.user.precedence", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);
}

From source file:voldemort.store.readonly.disk.HadoopStoreWriterPerBucket.java

License:Apache License

@Override
public void conf(JobConf job) {

    JobConf conf = job;
    try {//from ww  w  .j  ava2  s .  com

        this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
        List<StoreDefinition> storeDefs = new StoreDefinitionsMapper()
                .readStoreList(new StringReader(conf.get("stores.xml")));
        if (storeDefs.size() != 1)
            throw new IllegalStateException("Expected to find only a single store, but found multiple!");
        this.storeDef = storeDefs.get(0);

        this.numChunks = conf.getInt("num.chunks", -1);
        if (this.numChunks < 1)
            throw new VoldemortException("num.chunks not specified in the job conf.");

        this.saveKeys = conf.getBoolean("save.keys", false);
        this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
        this.conf = job;
        this.outputDir = job.get("final.output.dir");
        this.taskId = job.get("mapred.task.id");
        this.checkSumType = CheckSum.fromString(job.get("checksum.type"));

        this.checkSumDigestIndex = new CheckSum[getNumChunks()];
        this.checkSumDigestValue = new CheckSum[getNumChunks()];
        this.position = new int[getNumChunks()];
        this.taskIndexFileName = new Path[getNumChunks()];
        this.taskValueFileName = new Path[getNumChunks()];
        this.indexFileStream = new DataOutputStream[getNumChunks()];
        this.valueFileStream = new DataOutputStream[getNumChunks()];

        for (int chunkId = 0; chunkId < getNumChunks(); chunkId++) {

            this.checkSumDigestIndex[chunkId] = CheckSum.getInstance(checkSumType);
            this.checkSumDigestValue[chunkId] = CheckSum.getInstance(checkSumType);
            this.position[chunkId] = 0;

            this.taskIndexFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job),
                    getStoreName() + "." + Integer.toString(chunkId) + "_" + this.taskId + ".index");
            this.taskValueFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job),
                    getStoreName() + "." + Integer.toString(chunkId) + "_" + this.taskId + ".data");

            if (this.fs == null)
                this.fs = this.taskIndexFileName[chunkId].getFileSystem(job);

            this.indexFileStream[chunkId] = fs.create(this.taskIndexFileName[chunkId]);
            fs.setPermission(this.taskIndexFileName[chunkId],
                    new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION));
            logger.info("Setting permission to 755 for " + this.taskIndexFileName[chunkId]);

            this.valueFileStream[chunkId] = fs.create(this.taskValueFileName[chunkId]);
            fs.setPermission(this.taskValueFileName[chunkId],
                    new FsPermission(HadoopStoreBuilder.HADOOP_FILE_PERMISSION));
            logger.info("Setting permission to 755 for " + this.taskValueFileName[chunkId]);

            logger.info("Opening " + this.taskIndexFileName[chunkId] + " and " + this.taskValueFileName[chunkId]
                    + " for writing.");
        }

    } catch (IOException e) {
        // throw new RuntimeException("Failed to open Input/OutputStream",
        // e);
        e.printStackTrace();
    }

}

From source file:voldemort.store.readonly.mr.AbstractStoreBuilderConfigurable.java

License:Apache License

public void configure(JobConf conf) {
    this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper()
            .readStoreList(new StringReader(conf.get("stores.xml")));
    if (storeDefs.size() != 1)
        throw new IllegalStateException("Expected to find only a single store, but found multiple!");
    this.storeDef = storeDefs.get(0);

    this.numChunks = conf.getInt("num.chunks", -1);
    if (this.numChunks < 1)
        throw new VoldemortException("num.chunks not specified in the job conf.");

    this.saveKeys = conf.getBoolean("save.keys", false);
    this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
}

From source file:voldemort.store.readonly.mr.AvroStoreBuilderMapper.java

License:Apache License

@Override
public void configure(JobConf conf) {

    super.setConf(conf);
    // from parent code

    md5er = ByteUtils.getDigest("md5");

    this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper()
            .readStoreList(new StringReader(conf.get("stores.xml")));

    if (storeDefs.size() != 1)
        throw new IllegalStateException("Expected to find only a single store, but found multiple!");
    this.storeDef = storeDefs.get(0);

    this.numChunks = conf.getInt("num.chunks", -1);
    if (this.numChunks < 1)
        throw new VoldemortException("num.chunks not specified in the job conf.");

    this.saveKeys = conf.getBoolean("save.keys", true);
    this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);

    keySerializerDefinition = getStoreDef().getKeySerializer();
    valueSerializerDefinition = getStoreDef().getValueSerializer();

    try {//  ww  w . j a  va2  s.  co m
        SerializerFactory factory = new DefaultSerializerFactory();

        if (conf.get("serializer.factory") != null) {
            factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance();
        }

        keySerializer = factory.getSerializer(keySerializerDefinition);
        valueSerializer = factory.getSerializer(valueSerializerDefinition);

        keyField = conf.get("avro.key.field");

        valField = conf.get("avro.value.field");

        keySchema = conf.get("avro.key.schema");
        valSchema = conf.get("avro.val.schema");

        if (keySerializerDefinition.getName().equals("avro-generic")) {
            keySerializer = new AvroGenericSerializer(keySchema);
            valueSerializer = new AvroGenericSerializer(valSchema);
        } else {

            if (keySerializerDefinition.hasVersion()) {
                Map<Integer, String> versions = new HashMap<Integer, String>();
                for (Map.Entry<Integer, String> entry : keySerializerDefinition.getAllSchemaInfoVersions()
                        .entrySet())
                    versions.put(entry.getKey(), entry.getValue());
                keySerializer = new AvroVersionedGenericSerializer(versions);
            } else
                keySerializer = new AvroVersionedGenericSerializer(
                        keySerializerDefinition.getCurrentSchemaInfo());

            if (valueSerializerDefinition.hasVersion()) {
                Map<Integer, String> versions = new HashMap<Integer, String>();
                for (Map.Entry<Integer, String> entry : valueSerializerDefinition.getAllSchemaInfoVersions()
                        .entrySet())
                    versions.put(entry.getKey(), entry.getValue());
                valueSerializer = new AvroVersionedGenericSerializer(versions);
            } else
                valueSerializer = new AvroVersionedGenericSerializer(
                        valueSerializerDefinition.getCurrentSchemaInfo());

        }

    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression());
    valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression());

    routingStrategy = new ConsistentRoutingStrategy(getCluster().getNodes(),
            getStoreDef().getReplicationFactor());

    Props props = HadoopUtils.getPropsFromJob(conf);

}

From source file:voldemort.store.readonly.mr.AvroStoreBuilderPartitioner.java

License:Apache License

@Override
public void configure(JobConf conf) {
    this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
    List<StoreDefinition> storeDefs = new StoreDefinitionsMapper()
            .readStoreList(new StringReader(conf.get("stores.xml")));
    if (storeDefs.size() != 1)
        throw new IllegalStateException("Expected to find only a single store, but found multiple!");
    this.storeDef = storeDefs.get(0);

    this.numChunks = conf.getInt("num.chunks", -1);
    if (this.numChunks < 1)
        throw new VoldemortException("num.chunks not specified in the job conf.");

    this.saveKeys = conf.getBoolean("save.keys", false);
    this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
}

From source file:voldemort.store.readwrite.mr.AbstractRWHadoopStoreBuilderMapper.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
public void configure(JobConf conf) {
    super.configure(conf);

    md5er = ByteUtils.getDigest("md5");
    keySerializerDefinition = getStoreDef().getKeySerializer();
    valueSerializerDefinition = getStoreDef().getValueSerializer();

    try {//www  .  ja v  a  2 s  . c o m
        SerializerFactory factory = new DefaultSerializerFactory();

        if (conf.get("serializer.factory") != null) {
            factory = (SerializerFactory) Class.forName(conf.get("serializer.factory")).newInstance();
        }

        keySerializer = (Serializer<Object>) factory.getSerializer(keySerializerDefinition);
        valueSerializer = (Serializer<Object>) factory.getSerializer(valueSerializerDefinition);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression());
    valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression());

    RoutingStrategyFactory factory = new RoutingStrategyFactory();
    routingStrategy = factory.updateRoutingStrategy(getStoreDef(), getCluster());

    vectorNodeId = conf.getInt("vector.node.id", -1);
    vectorNodeVersion = conf.getLong("vector.node.version", 1L);

    jobStartTime = conf.getLong("job.start.time.ms", -1);
    if (jobStartTime < 0) {
        throw new RuntimeException("Incorrect job start time");
    }
}