List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name, String defaultValue)
name
. From source file:explain.ExplainTask.java
License:Apache License
public int explain(String stageid, Task<? extends Serializable> rootTask, OutputStream outS, JobConf jobconf) { this.stageid = stageid; this.jobconf = jobconf; this.mrBlock = new MRBlockInfo(); String sql = jobconf.get("hive.query.string", "").replace("\n", " "); if (!sql.equals(parseredSQL)) { //if need to refresh try {//from w w w. j ava2s. com queryBlock = parser.getQueryBlock(sql); parseredSQL = sql; } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (rootTask != null && rootTask instanceof MapRedTask) { mapredwork = ((MapRedTask) rootTask).getWork(); } PrintStream out = null; try { out = new PrintStream(outS); List rootTasks = new ArrayList(); rootTasks.add(rootTask); // Go over this task and dump out the plan outputStagePlans(out, rootTasks, 0); //output the sql this task will execute outputSQL(out); return (0); } catch (Exception e) { e.printStackTrace(); return (1); } finally { IOUtils.closeStream(out); } }
From source file:gobblin.hadoop.token.TokenUtils.java
License:Open Source License
private static Text getMRTokenRenewerInternal(JobConf jobConf) throws IOException { String servicePrincipal = jobConf.get(YARN_RESOURCEMANAGER_PRINCIPAL, jobConf.get(JTConfig.JT_USER_NAME)); Text renewer;//from ww w. j a va2 s .com if (servicePrincipal != null) { String target = jobConf.get(YARN_RESOURCEMANAGER_ADDRESS, jobConf.get(MAPREDUCE_JOBTRACKER_ADDRESS)); if (target == null) { target = jobConf.get(MAPRED_JOB_TRACKER); } String addr = NetUtils.createSocketAddr(target).getHostName(); renewer = new Text(SecurityUtil.getServerPrincipal(servicePrincipal, addr)); } else { // No security renewer = new Text("azkaban mr tokens"); } return renewer; }
From source file:hanb.elasticsearch.expert.hadoop.mr.PrintStreamOutputFormat.java
License:Apache License
@Override public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { stream = Stream.valueOf(job.get(Stream.class.getName(), Stream.NULL.name())); return new PrintStreamRecordWriter(); }
From source file:init.UpdateDataMapper.java
License:Apache License
@Override public void configure(JobConf job) { // TODO Auto-generated method stub super.configure(job); fmap = FMap.getFMap(job, job.get("freqPath", "data/freqs/1")); }
From source file:io.prestosql.plugin.hive.HiveUtil.java
License:Apache License
public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns) { // determine which hive columns we will read List<HiveColumnHandle> readColumns = ImmutableList .copyOf(filter(columns, column -> column.getColumnType() == REGULAR)); List<Integer> readHiveColumnIndexes = ImmutableList .copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex)); // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files setReadColumns(configuration, readHiveColumnIndexes); InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true); JobConf jobConf = toJobConf(configuration); FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null); // propagate serialization configuration to getRecordReader schema.stringPropertyNames().stream().filter(name -> name.startsWith("serialization.")) .forEach(name -> jobConf.set(name, schema.getProperty(name))); // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries List<String> codecs = newArrayList( Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", ""))); if (!codecs.contains(LzoCodec.class.getName())) { codecs.add(0, LzoCodec.class.getName()); }/*from w w w . jav a 2s . com*/ if (!codecs.contains(LzopCodec.class.getName())) { codecs.add(0, LzopCodec.class.getName()); } jobConf.set("io.compression.codecs", codecs.stream().collect(joining(","))); try { RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat .getRecordReader(fileSplit, jobConf, Reporter.NULL); int headerCount = getHeaderCount(schema); if (headerCount > 0) { Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue()); } int footerCount = getFooterCount(schema); if (footerCount > 0) { recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf); } return recordReader; } catch (IOException e) { if (e instanceof TextLineLengthLimitExceededException) { throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e); } throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e); } }
From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java
License:Apache License
static <K> void configureDataFileWriter(DataFileWriter<K> writer, JobConf job) throws UnsupportedEncodingException { if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory);/* w w w. jav a 2 s . c o m*/ } writer.setSyncInterval( job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } }
From source file:net.iponweb.hadoop.streaming.io.ByKeyOutputFormat.java
License:Apache License
private void initialize(JobConf job) throws IOException { SupportedOutputFormats.put("text", "org.apache.hadoop.mapred.TextOutputFormat"); SupportedOutputFormats.put("sequence", "org.apache.hadoop.mapred.SequenceFileOutputFormat"); SupportedOutputFormats.put("avrojson", "net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat"); SupportedOutputFormats.put("avrotext", "net.iponweb.hadoop.streaming.avro.AvroAsTextOutputFormat"); SupportedOutputFormats.put("parquettext", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat"); SupportedOutputFormats.put("parquetjson", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat"); String format = job.get("iow.streaming.bykeyoutputformat", "text"); for (String f : SupportedOutputFormats.keySet()) if (f.equals(format)) { try { internalOutputFormat = (OutputFormat<Text, Text>) Class.forName(SupportedOutputFormats.get(f)) .newInstance();//from www . j ava2s . c o m } catch (Exception e) { e.printStackTrace(); throw new IOException("Can't instantiate class '" + SupportedOutputFormats.get(f) + "'"); } } if (internalOutputFormat == null) throw new IOException("Unknown result type: '" + format + "'"); assumeFileNamesSorted = job.getBoolean("iow.streaming.bykeyoutputformat.keys.sorted", false); String delimiter = job.get("map.output.key.field.separator", "\t"); splitter = new KeyValueSplitter(delimiter); LOG.info(getClass().getSimpleName() + " initialized, output format is: " + format); }
From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Returns list of channel names.//from ww w. j a v a2 s . c o m * * @param conf job conf * @return List of channel Names */ public static List<String> getNamedOutputsList(JobConf conf) { List<String> names = new ArrayList<String>(); StringTokenizer st = new StringTokenizer(conf.get(NAMED_OUTPUTS, ""), " "); while (st.hasMoreTokens()) { names.add(st.nextToken()); } return names; }
From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Adds a named output for the job.//from w ww.j ava2 s . co m * <p/> * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the * default output. * @param multi indicates if the named output is multi * @param outputFormatClass OutputFormat class. * @param schema Schema to used for this namedOutput */ private static void addNamedOutput(JobConf conf, String namedOutput, boolean multi, Class<? extends OutputFormat> outputFormatClass, Schema schema) { checkNamedOutputName(namedOutput); checkNamedOutput(conf, namedOutput, true); boolean isMapOnly = conf.getNumReduceTasks() == 0; if (schema != null) conf.set(MO_PREFIX + namedOutput + ".schema", schema.toString()); conf.set(NAMED_OUTPUTS, conf.get(NAMED_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.setBoolean(MO_PREFIX + namedOutput + MULTI, multi); }
From source file:org.apache.avro.mapred.AvroTextOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = Schema.create(Schema.Type.BYTES); final byte[] keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", "\t") .getBytes(UTF8);/* w w w .j a v a2 s .c om*/ final DataFileWriter<ByteBuffer> writer = new DataFileWriter<ByteBuffer>( new ReflectDatumWriter<ByteBuffer>()); AvroOutputFormat.configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroTextRecordWriter(writer, keyValueSeparator); }