Example usage for org.apache.hadoop.mapred JobConf getBoolean

List of usage examples for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:it.crs4.pydoop.pipes.PipesReducer.java

License:Apache License

public void configure(JobConf job) {
    this.job = job;
    //disable the auto increment of the counter. For pipes, no of processed 
    //records could be different(equal or less) than the no of records input.
    SkipBadRecords.setAutoIncrReducerProcCount(job, false);
    skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

/**
 * Check whether the job is using a Java RecordReader
 * @param conf the configuration to check
 * @return is it a Java RecordReader?/* w ww. j a  v a  2  s .  co  m*/
 */
public static boolean getIsJavaRecordReader(JobConf conf) {
    return conf.getBoolean(Submitter.IS_JAVA_RR, false);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

/**
 * Check whether the job is using a Java Mapper.
 * @param conf the configuration to check
 * @return is it a Java Mapper?// w  w w  .  j  ava2s. c om
 */
public static boolean getIsJavaMapper(JobConf conf) {
    return conf.getBoolean(Submitter.IS_JAVA_MAP, false);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

/**
 * Check whether the job is using a Java Reducer.
 * @param conf the configuration to check
 * @return is it a Java Reducer?//from ww  w  .j  av a2s.co m
 */
public static boolean getIsJavaReducer(JobConf conf) {
    return conf.getBoolean(Submitter.IS_JAVA_REDUCE, false);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

/**
 * Will the reduce use a Java RecordWriter?
 * @param conf the configuration to check
 * @return true, if the output of the job will be written by Java
 *///w w w  .  j  av  a2 s . c o m
public static boolean getIsJavaRecordWriter(JobConf conf) {
    return conf.getBoolean(Submitter.IS_JAVA_RW, false);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

/**
 * Does the user want to keep the command file for debugging? If this is
 * true, pipes will write a copy of the command data to a file in the
 * task directory named "downlink.data", which may be used to run the C++
 * program under the debugger. You probably also want to set 
 * JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
 * being deleted.//from w  w  w . j a v a 2 s .  c  o  m
 * To run using the data file, set the environment variable 
 * "mapreduce.pipes.commandfile" to point to the file.
 * @param conf the configuration to check
 * @return will the framework save the command file?
 */
public static boolean getKeepCommandFile(JobConf conf) {
    return conf.getBoolean(Submitter.PRESERVE_COMMANDFILE, false);
}

From source file:ivory.preprocess.BuildIntDocVectorsForwardIndex.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    String indexPath = conf.get("Ivory.IndexPath");
    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    int mapTasks = conf.getInt("Ivory.NumMapTasks", 0);
    String collectionName = env.readCollectionName();
    boolean buildWeighted = conf.getBoolean("Ivory.BuildWeighted", false);

    sLogger.info("Tool: BuildIntDocVectorsIndex");
    sLogger.info(" - IndexPath: " + indexPath);
    sLogger.info(" - CollectionName: " + collectionName);
    sLogger.info(" - BuildWeighted: " + buildWeighted);
    sLogger.info(" - NumMapTasks: " + mapTasks);

    String intDocVectorsPath;/*from  w w  w.  j av a2 s .c  om*/
    String forwardIndexPath;
    if (buildWeighted) {
        intDocVectorsPath = env.getWeightedIntDocVectorsDirectory();
        forwardIndexPath = env.getWeightedIntDocVectorsForwardIndex();
    } else {
        intDocVectorsPath = env.getIntDocVectorsDirectory();
        forwardIndexPath = env.getIntDocVectorsForwardIndex();
    }

    if (!fs.exists(new Path(intDocVectorsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("IntDocVectorIndex already exists: skipping!");
        return 0;
    }

    conf.setJobName("BuildIntDocVectorsForwardIndex:" + collectionName);

    Path inputPath = new Path(intDocVectorsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java

License:Apache License

@Override
public RecordWriter<Text, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name,
        Progressable prog) throws IOException {

    Schema schema;/*from  ww  w.  ja v  a  2 s  .co m*/
    Schema.Parser p = new Schema.Parser();
    String strSchema = job.get("iow.streaming.output.schema");
    if (strSchema == null) {

        String schemaFile = job.get("iow.streaming.output.schema.file", "streaming_output_schema");

        if (job.getBoolean("iow.streaming.schema.use.prefix", false)) {
            // guess schema from file name
            // format is: schema:filename
            // with special keyword default - 'default:filename'

            String str[] = name.split(":");
            if (!str[0].equals("default"))
                schemaFile = str[0];

            name = str[1];
        }

        LOG.info(this.getClass().getSimpleName() + ": Using schema from file: " + schemaFile);
        File f = new File(schemaFile);
        schema = p.parse(f);
    } else {
        LOG.info(this.getClass().getSimpleName() + ": Using schema from jobconf.");
        schema = p.parse(strSchema);
    }

    if (schema == null) {
        throw new IOException("Can't find proper output schema");
    }

    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>());

    configureDataFileWriter(writer, job);

    Path path = FileOutputFormat.getTaskOutputPath(job, name + org.apache.avro.mapred.AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return createRecordWriter(writer, schema);
}

From source file:net.iponweb.hadoop.streaming.io.ByKeyOutputFormat.java

License:Apache License

private void initialize(JobConf job) throws IOException {

    SupportedOutputFormats.put("text", "org.apache.hadoop.mapred.TextOutputFormat");
    SupportedOutputFormats.put("sequence", "org.apache.hadoop.mapred.SequenceFileOutputFormat");
    SupportedOutputFormats.put("avrojson", "net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat");
    SupportedOutputFormats.put("avrotext", "net.iponweb.hadoop.streaming.avro.AvroAsTextOutputFormat");
    SupportedOutputFormats.put("parquettext", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat");
    SupportedOutputFormats.put("parquetjson", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat");

    String format = job.get("iow.streaming.bykeyoutputformat", "text");
    for (String f : SupportedOutputFormats.keySet())
        if (f.equals(format)) {

            try {
                internalOutputFormat = (OutputFormat<Text, Text>) Class.forName(SupportedOutputFormats.get(f))
                        .newInstance();/*w  w w  . j ava  2 s .c  o m*/
            } catch (Exception e) {
                e.printStackTrace();
                throw new IOException("Can't instantiate class '" + SupportedOutputFormats.get(f) + "'");
            }
        }

    if (internalOutputFormat == null)
        throw new IOException("Unknown result type: '" + format + "'");

    assumeFileNamesSorted = job.getBoolean("iow.streaming.bykeyoutputformat.keys.sorted", false);
    String delimiter = job.get("map.output.key.field.separator", "\t");
    splitter = new KeyValueSplitter(delimiter);

    LOG.info(getClass().getSimpleName() + " initialized, output format is: " + format);
}

From source file:net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat.java

License:Apache License

public RecordWriter<Text, Text> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress)
        throws IOException {

    // find and load schema

    String writeSchema = job.get("iow.streaming.output.schema");
    MessageType s;//  www.  jav  a2s . c  o m

    if (writeSchema == null) {

        String schemaFile = job.get("iow.streaming.output.schema.file", "streaming_output_schema");

        if (job.getBoolean("iow.streaming.schema.use.prefix", false)) {
            // guess schema from file name
            // format is: schema:filename
            // with special keyword default - 'default:filename'

            String str[] = name.split(":");
            if (!str[0].equals("default"))
                schemaFile = str[0];

            name = str[1];
        }

        LOG.info("Using schema: " + schemaFile);
        File f = new File(schemaFile);
        try {
            BufferedReader reader = new BufferedReader(new FileReader(f));
            StringBuilder r = new StringBuilder();
            String line;
            while ((line = reader.readLine()) != null)
                r.append(line);

            writeSchema = r.toString();

        } catch (Throwable e) {
            LOG.error("Can't read schema file " + schemaFile);
            Throwables.propagateIfPossible(e, IOException.class);
            throw new RuntimeException(e);
        }
    }
    s = MessageTypeParser.parseMessageType(writeSchema);

    setWriteSupportClass(job, GroupWriteSupport.class);
    GroupWriteSupport.setSchema(s, job);

    CompressionCodecName codec = getCodec(job);
    String extension = codec.getExtension() + ".parquet";
    Path file = getDefaultWorkFile(job, name, extension);

    ParquetRecordWriter<SimpleGroup> realWriter;
    try {
        realWriter = (ParquetRecordWriter<SimpleGroup>) realOutputFormat.getRecordWriter(job, file, codec);
    } catch (InterruptedException e) {
        Thread.interrupted();
        throw new IOException(e);
    }

    return createRecordWriter(realWriter, fs, job, name, progress);
}