Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name, String defaultValue) 

Source Link

Document

Get the value of the name.

Usage

From source file:explain.ExplainTask.java

License:Apache License

public int explain(String stageid, Task<? extends Serializable> rootTask, OutputStream outS, JobConf jobconf) {

    this.stageid = stageid;
    this.jobconf = jobconf;
    this.mrBlock = new MRBlockInfo();

    String sql = jobconf.get("hive.query.string", "").replace("\n", " ");

    if (!sql.equals(parseredSQL)) { //if need to refresh
        try {//from  w  w w. j ava2s. com
            queryBlock = parser.getQueryBlock(sql);
            parseredSQL = sql;
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    if (rootTask != null && rootTask instanceof MapRedTask) {
        mapredwork = ((MapRedTask) rootTask).getWork();
    }
    PrintStream out = null;
    try {
        out = new PrintStream(outS);
        List rootTasks = new ArrayList();
        rootTasks.add(rootTask);
        // Go over this task and dump out the plan
        outputStagePlans(out, rootTasks, 0);

        //output the sql this task will execute
        outputSQL(out);

        return (0);
    } catch (Exception e) {
        e.printStackTrace();
        return (1);
    } finally {
        IOUtils.closeStream(out);
    }
}

From source file:gobblin.hadoop.token.TokenUtils.java

License:Open Source License

private static Text getMRTokenRenewerInternal(JobConf jobConf) throws IOException {
    String servicePrincipal = jobConf.get(YARN_RESOURCEMANAGER_PRINCIPAL, jobConf.get(JTConfig.JT_USER_NAME));
    Text renewer;//from ww  w. j a va2  s .com
    if (servicePrincipal != null) {
        String target = jobConf.get(YARN_RESOURCEMANAGER_ADDRESS, jobConf.get(MAPREDUCE_JOBTRACKER_ADDRESS));
        if (target == null) {
            target = jobConf.get(MAPRED_JOB_TRACKER);
        }

        String addr = NetUtils.createSocketAddr(target).getHostName();
        renewer = new Text(SecurityUtil.getServerPrincipal(servicePrincipal, addr));
    } else {
        // No security
        renewer = new Text("azkaban mr tokens");
    }

    return renewer;
}

From source file:hanb.elasticsearch.expert.hadoop.mr.PrintStreamOutputFormat.java

License:Apache License

@Override
public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress)
        throws IOException {
    stream = Stream.valueOf(job.get(Stream.class.getName(), Stream.NULL.name()));
    return new PrintStreamRecordWriter();
}

From source file:init.UpdateDataMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    // TODO Auto-generated method stub
    super.configure(job);
    fmap = FMap.getFMap(job, job.get("freqPath", "data/freqs/1"));
}

From source file:io.prestosql.plugin.hive.HiveUtil.java

License:Apache License

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start,
        long length, Properties schema, List<HiveColumnHandle> columns) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList
            .copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList
            .copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));

    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);

    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = toJobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);

    // propagate serialization configuration to getRecordReader
    schema.stringPropertyNames().stream().filter(name -> name.startsWith("serialization."))
            .forEach(name -> jobConf.set(name, schema.getProperty(name)));

    // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
    List<String> codecs = newArrayList(
            Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
    if (!codecs.contains(LzoCodec.class.getName())) {
        codecs.add(0, LzoCodec.class.getName());
    }/*from w  w w  .  jav  a  2s  .  com*/
    if (!codecs.contains(LzopCodec.class.getName())) {
        codecs.add(0, LzopCodec.class.getName());
    }
    jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));

    try {
        RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat
                .getRecordReader(fileSplit, jobConf, Reporter.NULL);

        int headerCount = getHeaderCount(schema);
        if (headerCount > 0) {
            Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(),
                    recordReader.createValue());
        }

        int footerCount = getFooterCount(schema);
        if (footerCount > 0) {
            recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
        }

        return recordReader;
    } catch (IOException e) {
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }

        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT,
                format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length,
                        getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())),
                e);
    }
}

From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java

License:Apache License

static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job)
        throws UnsupportedEncodingException {

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
                org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(factory);/*  w w w.  jav  a 2 s . c o  m*/
    }

    writer.setSyncInterval(
            job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));

    // copy metadata from job
    for (Map.Entry<String, String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }
}

From source file:net.iponweb.hadoop.streaming.io.ByKeyOutputFormat.java

License:Apache License

private void initialize(JobConf job) throws IOException {

    SupportedOutputFormats.put("text", "org.apache.hadoop.mapred.TextOutputFormat");
    SupportedOutputFormats.put("sequence", "org.apache.hadoop.mapred.SequenceFileOutputFormat");
    SupportedOutputFormats.put("avrojson", "net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat");
    SupportedOutputFormats.put("avrotext", "net.iponweb.hadoop.streaming.avro.AvroAsTextOutputFormat");
    SupportedOutputFormats.put("parquettext", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat");
    SupportedOutputFormats.put("parquetjson", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat");

    String format = job.get("iow.streaming.bykeyoutputformat", "text");
    for (String f : SupportedOutputFormats.keySet())
        if (f.equals(format)) {

            try {
                internalOutputFormat = (OutputFormat<Text, Text>) Class.forName(SupportedOutputFormats.get(f))
                        .newInstance();//from  www  . j  ava2s  .  c o m
            } catch (Exception e) {
                e.printStackTrace();
                throw new IOException("Can't instantiate class '" + SupportedOutputFormats.get(f) + "'");
            }
        }

    if (internalOutputFormat == null)
        throw new IOException("Unknown result type: '" + format + "'");

    assumeFileNamesSorted = job.getBoolean("iow.streaming.bykeyoutputformat.keys.sorted", false);
    String delimiter = job.get("map.output.key.field.separator", "\t");
    splitter = new KeyValueSplitter(delimiter);

    LOG.info(getClass().getSimpleName() + " initialized, output format is: " + format);
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Returns list of channel names.//from ww w. j  a  v a2  s .  c  o  m
 *
 * @param conf job conf
 * @return List of channel Names
 */
public static List<String> getNamedOutputsList(JobConf conf) {
    List<String> names = new ArrayList<String>();
    StringTokenizer st = new StringTokenizer(conf.get(NAMED_OUTPUTS, ""), " ");
    while (st.hasMoreTokens()) {
        names.add(st.nextToken());
    }
    return names;
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Adds a named output for the job.//from w ww.j  ava2  s .  co m
 * <p/>
 *
 * @param conf              job conf to add the named output
 * @param namedOutput       named output name, it has to be a word, letters
 *                          and numbers only, cannot be the word 'part' as
 *                          that is reserved for the
 *                          default output.
 * @param multi             indicates if the named output is multi
 * @param outputFormatClass OutputFormat class.
 * @param schema            Schema to used for this namedOutput
 */
private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi,
        Class<? extends OutputFormat> outputFormatClass, Schema schema) {
    checkNamedOutputName(namedOutput);
    checkNamedOutput(conf, namedOutput, true);
    boolean isMapOnly = conf.getNumReduceTasks() == 0;
    if (schema != null)
        conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString());
    conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput);
    conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class);
    conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi);
}

From source file:org.apache.avro.mapred.AvroTextOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog)
        throws IOException {

    Schema schema = Schema.create(Schema.Type.BYTES);

    final byte[] keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t")
            .getBytes(UTF8);/* w w  w  .j a  v a2 s .c om*/

    final DataFileWriter<ByteBuffer> writer = new DataFileWriter<ByteBuffer>(
            new ReflectDatumWriter<ByteBuffer>());

    AvroOutputFormat.configureDataFileWriter(writer, job);

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroTextRecordWriter(writer, keyValueSeparator);
}