Example usage for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:uk.bl.wa.hadoop.mapreduce.mdx.MDXSeqMerger.java

License:Open Source License

/**
 * /*from w  w w.j  av  a  2s.  c o m*/
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
public void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    // Input
    conf.setInputFormat(SequenceFileInputFormat.class);
    // M-R
    conf.setMapperClass(IdentityMapper.class);
    conf.setReducerClass(MDXReduplicatingReducer.class);
    // Map outputs
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    // Job outputs
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);
    LOG.info("Used " + numReducers + " reducers.");
    conf.setNumReduceTasks(numReducers);

    // Compress the output from the maps, to cut down temp space
    // requirements between map and reduce.
    conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax
                                                            // for 0.20.x ?
    conf.set("mapred.compress.map.output", "true");
    // conf.set("mapred.map.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.task.classpath.user.precedence", true);

}

From source file:uk.bl.wa.hadoop.mapreduce.warcstats.WARCRawStatsMDXGenerator.java

License:Open Source License

/**
 * /*from w  w w .  j  a  va2  s .  c o  m*/
 * @param args
 * @return
 * @throws IOException
 * @throws ParseException
 * @throws InterruptedException
 * @throws KeeperException
 */
protected void createJobConf(JobConf conf, String[] args)
        throws IOException, ParseException, KeeperException, InterruptedException {
    // Parse the command-line parameters.
    this.setup(args, conf);

    // Add input paths:
    LOG.info("Reading input files...");
    String line = null;
    BufferedReader br = new BufferedReader(new FileReader(this.inputPath));
    while ((line = br.readLine()) != null) {
        FileInputFormat.addInputPath(conf, new Path(line));
    }
    br.close();
    LOG.info("Read " + FileInputFormat.getInputPaths(conf).length + " input files.");

    FileOutputFormat.setOutputPath(conf, new Path(this.outputPath));

    conf.setJobName(this.inputPath + "_" + System.currentTimeMillis());
    conf.setInputFormat(ArchiveFileInputFormat.class);
    conf.setMapperClass(WARCRawStatsMapper.class);
    conf.setReducerClass(MDXReduplicatingReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);
    // OR TextOutputFormat?
    // conf.set("map.output.key.field.separator", "");
    // Compress the output from the maps, to cut down temp space
    // requirements between map and reduce.
    conf.setBoolean("mapreduce.map.output.compress", true); // Wrong syntax
    // for 0.20.x ?
    conf.set("mapred.compress.map.output", "true");
    // conf.set("mapred.map.output.compression.codec",
    // "org.apache.hadoop.io.compress.GzipCodec");
    // Ensure the JARs we provide take precedence over ones from Hadoop:
    conf.setBoolean("mapreduce.task.classpath.user.precedence", true);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(numReducers);
}

From source file:voldemort.store.readonly.mr.HadoopStoreBuilder.java

License:Apache License

/**
 * Run the job/*  w ww  . j a v a 2  s  . c om*/
 */
public void build() {
    try {
        JobConf conf = new JobConf(config);
        conf.setInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
        conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster));
        conf.set("stores.xml",
                new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
        conf.setBoolean("save.keys", saveKeys);
        conf.setBoolean("reducer.per.bucket", reducerPerBucket);
        if (!isAvro) {
            conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
            conf.setMapperClass(mapperClass);
            conf.setMapOutputKeyClass(BytesWritable.class);
            conf.setMapOutputValueClass(BytesWritable.class);
            if (reducerPerBucket) {
                conf.setReducerClass(HadoopStoreBuilderReducerPerBucket.class);
            } else {
                conf.setReducerClass(HadoopStoreBuilderReducer.class);
            }
        }
        conf.setInputFormat(inputFormatClass);
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        conf.setOutputKeyClass(BytesWritable.class);
        conf.setOutputValueClass(BytesWritable.class);
        conf.setJarByClass(getClass());
        conf.setReduceSpeculativeExecution(false);
        FileInputFormat.setInputPaths(conf, inputPath);
        conf.set("final.output.dir", outputDir.toString());
        conf.set("checksum.type", CheckSum.toString(checkSumType));
        FileOutputFormat.setOutputPath(conf, tempDir);

        FileSystem outputFs = outputDir.getFileSystem(conf);
        if (outputFs.exists(outputDir)) {
            throw new IOException("Final output directory already exists.");
        }

        // delete output dir if it already exists
        FileSystem tempFs = tempDir.getFileSystem(conf);
        tempFs.delete(tempDir, true);

        long size = sizeOfPath(tempFs, inputPath);
        logger.info("Data size = " + size + ", replication factor = " + storeDef.getReplicationFactor()
                + ", numNodes = " + cluster.getNumberOfNodes() + ", chunk size = " + chunkSizeBytes);

        // Derive "rough" number of chunks and reducers
        int numReducers;
        if (saveKeys) {

            if (this.numChunks == -1) {
                this.numChunks = Math.max((int) (storeDef.getReplicationFactor() * size
                        / cluster.getNumberOfPartitions() / storeDef.getReplicationFactor() / chunkSizeBytes),
                        1);
            } else {
                logger.info(
                        "Overriding chunk size byte and taking num chunks (" + this.numChunks + ") directly");
            }

            if (reducerPerBucket) {
                numReducers = cluster.getNumberOfPartitions() * storeDef.getReplicationFactor();
            } else {
                numReducers = cluster.getNumberOfPartitions() * storeDef.getReplicationFactor() * numChunks;
            }
        } else {

            if (this.numChunks == -1) {
                this.numChunks = Math.max((int) (storeDef.getReplicationFactor() * size
                        / cluster.getNumberOfPartitions() / chunkSizeBytes), 1);
            } else {
                logger.info(
                        "Overriding chunk size byte and taking num chunks (" + this.numChunks + ") directly");
            }

            if (reducerPerBucket) {
                numReducers = cluster.getNumberOfPartitions();
            } else {
                numReducers = cluster.getNumberOfPartitions() * numChunks;
            }
        }
        conf.setInt("num.chunks", numChunks);
        conf.setNumReduceTasks(numReducers);

        if (isAvro) {
            conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
            // conf.setMapperClass(mapperClass);
            conf.setMapOutputKeyClass(ByteBuffer.class);
            conf.setMapOutputValueClass(ByteBuffer.class);

            conf.setInputFormat(inputFormatClass);

            conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
            conf.setOutputKeyClass(ByteBuffer.class);
            conf.setOutputValueClass(ByteBuffer.class);

            // AvroJob confs for the avro mapper
            AvroJob.setInputSchema(conf, Schema.parse(config.get("avro.rec.schema")));

            AvroJob.setOutputSchema(conf,
                    Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES)));

            AvroJob.setMapperClass(conf, mapperClass);

            if (reducerPerBucket) {
                conf.setReducerClass(AvroStoreBuilderReducerPerBucket.class);
            } else {
                conf.setReducerClass(AvroStoreBuilderReducer.class);
            }

        }

        logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers + ", save keys: "
                + saveKeys + ", reducerPerBucket: " + reducerPerBucket);
        logger.info("Building store...");
        RunningJob job = JobClient.runJob(conf);

        // Once the job has completed log the counter
        Counters counters = job.getCounters();

        if (saveKeys) {
            if (reducerPerBucket) {
                logger.info("Number of collisions in the job - "
                        + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
                logger.info("Maximum number of collisions for one entry - "
                        + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
            } else {
                logger.info("Number of collisions in the job - "
                        + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
                logger.info("Maximum number of collisions for one entry - "
                        + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
            }
        }

        // Do a CheckSumOfCheckSum - Similar to HDFS
        CheckSum checkSumGenerator = CheckSum.getInstance(this.checkSumType);
        if (!this.checkSumType.equals(CheckSumType.NONE) && checkSumGenerator == null) {
            throw new VoldemortException("Could not generate checksum digest for type " + this.checkSumType);
        }

        // Check if all folder exists and with format file
        for (Node node : cluster.getNodes()) {

            ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata();

            if (saveKeys) {
                metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V2.getCode());
            } else {
                metadata.add(ReadOnlyStorageMetadata.FORMAT, ReadOnlyStorageFormat.READONLY_V1.getCode());
            }

            Path nodePath = new Path(outputDir.toString(), "node-" + node.getId());

            if (!outputFs.exists(nodePath)) {
                logger.info("No data generated for node " + node.getId() + ". Generating empty folder");
                outputFs.mkdirs(nodePath); // Create empty folder
                outputFs.setPermission(nodePath, new FsPermission(HADOOP_FILE_PERMISSION));
                logger.info("Setting permission to 755 for " + nodePath);
            }

            if (checkSumType != CheckSumType.NONE) {

                FileStatus[] storeFiles = outputFs.listStatus(nodePath, new PathFilter() {

                    public boolean accept(Path arg0) {
                        if (arg0.getName().endsWith("checksum") && !arg0.getName().startsWith(".")) {
                            return true;
                        }
                        return false;
                    }
                });

                if (storeFiles != null && storeFiles.length > 0) {
                    Arrays.sort(storeFiles, new IndexFileLastComparator());
                    FSDataInputStream input = null;

                    for (FileStatus file : storeFiles) {
                        try {
                            input = outputFs.open(file.getPath());
                            byte fileCheckSum[] = new byte[CheckSum.checkSumLength(this.checkSumType)];
                            input.read(fileCheckSum);
                            logger.debug("Checksum for file " + file.toString() + " - "
                                    + new String(Hex.encodeHex(fileCheckSum)));
                            checkSumGenerator.update(fileCheckSum);
                        } catch (Exception e) {
                            logger.error("Error while reading checksum file " + e.getMessage(), e);
                        } finally {
                            if (input != null)
                                input.close();
                        }
                        outputFs.delete(file.getPath(), false);
                    }

                    metadata.add(ReadOnlyStorageMetadata.CHECKSUM_TYPE, CheckSum.toString(checkSumType));

                    String checkSum = new String(Hex.encodeHex(checkSumGenerator.getCheckSum()));
                    logger.info("Checksum for node " + node.getId() + " - " + checkSum);

                    metadata.add(ReadOnlyStorageMetadata.CHECKSUM, checkSum);
                }
            }

            // Write metadata
            Path metadataPath = new Path(nodePath, ".metadata");
            FSDataOutputStream metadataStream = outputFs.create(metadataPath);
            outputFs.setPermission(metadataPath, new FsPermission(HADOOP_FILE_PERMISSION));
            logger.info("Setting permission to 755 for " + metadataPath);
            metadataStream.write(metadata.toJsonString().getBytes());
            metadataStream.flush();
            metadataStream.close();

        }

    } catch (Exception e) {
        logger.error("Error in Store builder", e);
        throw new VoldemortException(e);
    }

}