Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroInput(JobConf job) {
    if (job.get("mapred.input.format.class") == null)
        job.setInputFormat(AvroInputFormat.class);

    if (job.getMapperClass() == IdentityMapper.class)
        job.setMapperClass(HadoopMapper.class);

    configureAvroShuffle(job);/*from  www  .java  2s  . co m*/
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);/* w  ww  . ja  v a  2  s . c o  m*/
}

From source file:org.apache.avro.mapred.AvroMultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link Schema} to the list of
 * inputs for the map-reduce job./*  w w  w  . ja va  2  s  .  com*/
 *
 * @param conf The configuration of the job
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputSchema {@link Schema} class to use for this path
 */
private static void addInputPath(JobConf conf, Path path, Schema inputSchema) {

    String schemaMapping = path.toString() + ";" + toBase64(inputSchema.toString());

    String schemas = conf.get(schemaKey);
    conf.set(schemaKey, schemas == null ? schemaMapping : schemas + "," + schemaMapping);

    conf.setInputFormat(DelegatingInputFormat.class);
}

From source file:org.apache.avro.mapred.AvroMultipleInputs.java

License:Apache License

/**
 * Add a {@link Path} with a custom {@link Schema} and
 * {@link AvroMapper} to the list of inputs for the map-reduce job.
 *
 * @param conf The configuration of the job
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputSchema {@link Schema} to use for this path
 * @param mapperClass {@link AvroMapper} class to use for this path
 *///from w  ww.jav a  2  s. co m
public static void addInputPath(JobConf conf, Path path, Class<? extends AvroMapper> mapperClass,
        Schema inputSchema) {

    addInputPath(conf, path, inputSchema);

    String mapperMapping = path.toString() + ";" + mapperClass.getName();
    System.out.println(mapperMapping);
    String mappers = conf.get(mappersKey);
    conf.set(mappersKey, mappers == null ? mapperMapping : mappers + "," + mapperMapping);

    conf.setMapperClass(DelegatingMapper.class);
}

From source file:org.apache.avro.mapred.AvroMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the {@link AvroMapper} class that
 * should be used for them.// w ww  .jav  a 2 s.c om
 *
 * @param conf The configuration of the job
 * @see #addInputPath(JobConf, Path, Class, Schema)
 * @return A map of paths-to-mappers for the job
 */
@SuppressWarnings("unchecked")
static Map<Path, Class<? extends AvroMapper>> getMapperTypeMap(JobConf conf) {
    if (conf.get(mappersKey) == null) {
        return Collections.emptyMap();
    }
    Map<Path, Class<? extends AvroMapper>> m = new HashMap<Path, Class<? extends AvroMapper>>();
    String[] pathMappings = conf.get(mappersKey).split(",");
    for (String pathMapping : pathMappings) {
        String[] split = pathMapping.split(";");
        Class<? extends AvroMapper> mapClass;
        try {
            mapClass = (Class<? extends AvroMapper>) conf.getClassByName(split[1]);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        m.put(new Path(split[0]), mapClass);
    }
    return m;
}

From source file:org.apache.avro.mapred.AvroMultipleInputs.java

License:Apache License

/**
 * Retrieves a map of {@link Path}s to the {@link Schema} that
 * should be used for them.//from  www .ja va2  s .co m
 *
 * @param conf The configuration of the job
 * @see #addInputPath(JobConf, Path, Class, Schema)
 * @return A map of paths to schemas for the job
 */
static Map<Path, Schema> getInputSchemaMap(JobConf conf) {
    if (conf.get(schemaKey) == null) {
        return Collections.emptyMap();
    }
    Map<Path, Schema> m = new HashMap<Path, Schema>();
    String[] schemaMappings = conf.get(schemaKey).split(",");
    Schema.Parser schemaParser = new Schema.Parser();
    for (String schemaMapping : schemaMappings) {
        String[] split = schemaMapping.split(";");
        String schemaString = fromBase64(split[1]);
        Schema inputSchema;
        try {
            inputSchema = schemaParser.parse(schemaString);
        } catch (SchemaParseException e) {
            throw new RuntimeException(e);
        }
        m.put(new Path(split[0]), inputSchema);
    }
    return m;
}

From source file:org.apache.avro.mapred.AvroOutputFormat.java

License:Apache License

/** This will select the correct compression codec from the JobConf.
 * The order of selection is as follows:
 * <ul>//from   w ww  . ja v a 2 s.  com
 *   <li>If mapred.output.compress is true then look for codec otherwise no compression</li>
 *   <li>Use avro.output.codec if populated</li>
 *   <li>Next use mapred.output.compression.codec if populated</li>
 *   <li>If not default to Deflate Codec</li>
 * </ul>  
 */
static CodecFactory getCodecFactory(JobConf job) {
    CodecFactory factory = null;

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC);

        if (codecName == null) {
            String codecClassName = job.get("mapred.output.compression.codec", null);
            String avroCodecName = HadoopCodecFactory.getAvroCodecName(codecClassName);
            if (codecClassName != null && avroCodecName != null) {
                factory = HadoopCodecFactory.fromHadoopString(codecClassName);
                job.set(AvroJob.OUTPUT_CODEC, avroCodecName);
                return factory;
            } else {
                return CodecFactory.deflateCodec(level);
            }
        } else {
            if (codecName.equals(DEFLATE_CODEC)) {
                factory = CodecFactory.deflateCodec(level);
            } else {
                factory = CodecFactory.fromString(codecName);
            }
        }
    }

    return factory;
}

From source file:org.apache.avro.mapred.tether.TetheredProcess.java

License:Apache License

private Process startSubprocess(JobConf job) throws IOException, InterruptedException {
    // get the executable command
    List<String> command = new ArrayList<String>();

    String executable = "";
    if (job.getBoolean(TetherJob.TETHER_EXEC_CACHED, false)) {
        //we want to use the cached executable
        Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
        if (localFiles == null) { // until MAPREDUCE-476
            URI[] files = DistributedCache.getCacheFiles(job);
            localFiles = new Path[] { new Path(files[0].toString()) };
        }//  ww  w .  j a  va2s  .c  o m
        executable = localFiles[0].toString();
        FileUtil.chmod(executable.toString(), "a+x");
    } else {
        executable = job.get(TetherJob.TETHER_EXEC);
    }

    command.add(executable);

    // Add the executable arguments. We assume the arguments are separated by
    // spaces so we split the argument string based on spaces and add each
    // token to command We need to do it this way because
    // TaskLog.captureOutAndError will put quote marks around each argument so
    // if we pass a single string containing all arguments we get quoted
    // incorrectly
    String args = job.get(TetherJob.TETHER_EXEC_ARGS);
    String[] aparams = args.split("\n");
    for (int i = 0; i < aparams.length; i++) {
        aparams[i] = aparams[i].trim();
        if (aparams[i].length() > 0) {
            command.add(aparams[i]);
        }
    }

    if (System.getProperty("hadoop.log.dir") == null && System.getenv("HADOOP_LOG_DIR") != null)
        System.setProperty("hadoop.log.dir", System.getenv("HADOOP_LOG_DIR"));

    // wrap the command in a stdout/stderr capture
    TaskAttemptID taskid = TaskAttemptID.forName(job.get("mapred.task.id"));
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(job);
    command = TaskLog.captureOutAndError(command, stdout, stderr, logLength);
    stdout.getParentFile().mkdirs();
    stderr.getParentFile().mkdirs();

    // add output server's port to env
    Map<String, String> env = new HashMap<String, String>();
    env.put("AVRO_TETHER_OUTPUT_PORT", Integer.toString(outputServer.getPort()));

    // start child process
    ProcessBuilder builder = new ProcessBuilder(command);
    System.out.println(command);
    builder.environment().putAll(env);
    return builder.start();
}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

/** Get the URI of the application's executable. */
public static URI getExecutable(JobConf job) {
    try {//w ww  .  j  a va2 s  .c  o m
        return new URI(job.get("avro.tether.executable"));
    } catch (URISyntaxException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.blur.hive.BlurHiveStorageHandler.java

License:Apache License

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    if (BlurSerDe.shouldUseMRWorkingPath(jobConf)) {
        String loadId = UUID.randomUUID().toString();
        jobConf.set(BlurSerDe.BLUR_MR_LOAD_ID, loadId);
        jobConf.setOutputCommitter(BlurHiveMRLoaderOutputCommitter.class);
    } else {/*  w w  w.j  a  v  a2s. c o m*/
        try {
            String bulkId = UUID.randomUUID().toString();
            String connectionStr = jobConf.get(BlurSerDe.BLUR_CONTROLLER_CONNECTION_STR);
            Iface client = BlurClient.getClient(connectionStr);
            client.bulkMutateStart(bulkId);
            BlurHiveOutputFormat.setBulkId(jobConf, bulkId);
            jobConf.setOutputCommitter(BlurHiveOutputCommitter.class);
        } catch (BlurException e) {
            throw new RuntimeException(e);
        } catch (TException e) {
            throw new RuntimeException(e);
        }
    }
}