List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.cloudera.sqoop.testutil.ExplicitSetMapper.java
License:Apache License
public void configure(JobConf job) { String userTypeName = job.get(USER_TYPE_NAME_KEY); if (null == userTypeName) { throw new RuntimeException("Unconfigured parameter: " + USER_TYPE_NAME_KEY); }//w w w . ja va2 s .c om setCol = job.get(SET_COL_KEY); setVal = job.get(SET_VAL_KEY); LOG.info("User type name set to " + userTypeName); LOG.info("Will try to set col " + setCol + " to " + setVal); this.userRecord = null; try { Configuration conf = new Configuration(); Class userClass = Class.forName(userTypeName, true, Thread.currentThread().getContextClassLoader()); this.userRecord = (SqoopRecord) ReflectionUtils.newInstance(userClass, conf); } catch (ClassNotFoundException cnfe) { // handled by the next block. LOG.error("ClassNotFound exception: " + cnfe.toString()); } catch (Exception e) { LOG.error("Got an exception reflecting user class: " + e.toString()); } if (null == this.userRecord) { LOG.error("Could not instantiate user record of type " + userTypeName); throw new RuntimeException("Could not instantiate user record of type " + userTypeName); } }
From source file:com.cloudera.sqoop.testutil.ReparseMapper.java
License:Apache License
public void configure(JobConf job) { String userTypeName = job.get(USER_TYPE_NAME_KEY); if (null == userTypeName) { throw new RuntimeException("Unconfigured parameter: " + USER_TYPE_NAME_KEY); }/*from w ww. java 2 s .c o m*/ LOG.info("User type name set to " + userTypeName); this.userRecord = null; try { Configuration conf = new Configuration(); Class userClass = Class.forName(userTypeName, true, Thread.currentThread().getContextClassLoader()); this.userRecord = (SqoopRecord) ReflectionUtils.newInstance(userClass, conf); } catch (ClassNotFoundException cnfe) { // handled by the next block. LOG.error("ClassNotFound exception: " + cnfe.toString()); } catch (Exception e) { LOG.error("Got an exception reflecting user class: " + e.toString()); } if (null == this.userRecord) { LOG.error("Could not instantiate user record of type " + userTypeName); throw new RuntimeException("Could not instantiate user record of type " + userTypeName); } }
From source file:com.conductor.s3.S3TextFileInputFormatMRV1.java
License:Apache License
@Override public RecordReader<LongWritable, Text> getRecordReader(InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { reporter.setStatus(genericSplit.toString()); final String delimiter = job.get("textinputformat.record.delimiter"); byte[] recordDelimiterBytes = null; if (null != delimiter) { recordDelimiterBytes = delimiter.getBytes(); }//from w w w . ja va 2 s .co m return new LineRecordReader(job, (FileSplit) genericSplit, recordDelimiterBytes); }
From source file:com.dappervision.hbase.mapred.TypedBytesTableInputFormat.java
License:Apache License
/** * Builds a TableRecordReader. If no TableRecordReader was provided, uses * the default./* w ww. jav a 2 s . c o m*/ * * @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, * JobConf, Reporter) */ public void configure(JobConf job) { Path[] tableNames = FileInputFormat.getInputPaths(job); String colArg = job.get(COLUMN_LIST); String[] colNames = colArg.split(" "); byte[][] m_cols = new byte[colNames.length][]; for (int i = 0; i < m_cols.length; i++) { m_cols[i] = Base64.decodeBase64(Bytes.toBytes(colNames[i])); } setInputColumns(m_cols); if (job.get(ROW_FILTER_REGEX) != null) { LOG.info("Row Regex Filter[" + job.get(ROW_FILTER_REGEX) + "]"); setRowFilter(new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(job.get(ROW_FILTER_REGEX)))); } if (job.get(START_ROW) != null) { LOG.info("Start Row[" + job.get(START_ROW) + "]"); try { setStartRow(Base64.decodeBase64(job.get(START_ROW).getBytes("US-ASCII"))); } catch (UnsupportedEncodingException e) { LOG.error("Start Row[" + job.get(START_ROW) + "] - Error"); } } if (job.get(STOP_ROW) != null) { LOG.info("Stop Row[" + job.get(STOP_ROW) + "]"); try { setStopRow(Base64.decodeBase64(job.get(STOP_ROW).getBytes("US-ASCII"))); } catch (UnsupportedEncodingException e) { LOG.error("Stop Row[" + job.get(STOP_ROW) + "] - Error"); } } try { setHTable(new HTable(HBaseConfiguration.create(job), tableNames[0].getName())); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } if (job.get(VALUE_FORMAT) != null && job.get(VALUE_FORMAT).equalsIgnoreCase("singlevalue")) { LOG.info("Value Format[" + job.get(VALUE_FORMAT) + "]"); super.setTableRecordReader(new TypedBytesTableRecordReaderSingleValue()); } else { LOG.info("Value Format[familiescolumns]"); super.setTableRecordReader(new TypedBytesTableRecordReader()); } }
From source file:com.datasalt.utils.viewbuilder.ShardedSolrDocumentConverter.java
License:Apache License
@Override @SuppressWarnings({ "rawtypes", "unchecked" }) public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { JobConf jobConf = (JobConf) conf; numShards = jobConf.getNumReduceTasks(); shardNumberShift = (int) Math.ceil(numDigits(Long.MAX_VALUE)) - 1 - (int) Math.ceil(numDigits(numShards)); log.info("Num shards : " + numShards); Class partitionerClass;/* ww w. j av a 2 s . c o m*/ try { partitionerClass = Class.forName(jobConf.get(PARTITIONER_CONF)); } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException( "Partitioner not set.Use conf.set(ShardedSolrDocument.PARTITIONER_CONF,partitionerClassName)", e); } log.info("Partitioner class : " + partitionerClass); partitioner = (Partitioner<KEY_TYPE, VALUE_TYPE>) ReflectionUtils.newInstance(partitionerClass, conf); } }
From source file:com.datascience.hadoop.CsvInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, ListWritable<Text>> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { String charsetName = conf.get(CHARSET); Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8; FileSplit split = (FileSplit) inputSplit; Path path = split.getPath();/*from w w w . j av a 2s . c om*/ FileSystem fs = path.getFileSystem(conf); InputStream is = fs.open(path); // If the input is compressed, load the compression codec. CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(path); if (codec != null) { Decompressor decompressor = CodecPool.getDecompressor(codec); is = codec.createInputStream(is, decompressor); } return new CsvRecordReader(new InputStreamReader(is, charset), createFormat(conf), split.getLength(), conf.getBoolean(STRICT_MODE, true)); }
From source file:com.datascience.hadoop.CsvOutputFormat.java
License:Apache License
@Override public RecordWriter<LongWritable, ListWritable<Text>> getRecordWriter(FileSystem fileSystem, JobConf conf, String name, Progressable progress) throws IOException { String charsetName = conf.get(CHARSET); Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8; Path path;/*from www . j a v a 2 s . c om*/ if (FileOutputFormat.getCompressOutput(conf)) { Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(conf, GzipCodec.class); CompressionCodec codec = ReflectionUtils.newInstance(codecClass, conf); path = FileOutputFormat.getTaskOutputPath(conf, name + codec.getDefaultExtension()); } else { path = FileOutputFormat.getTaskOutputPath(conf, name); } return new CsvRecordWriter(new OutputStreamWriter(path.getFileSystem(conf).create(path, progress), charset), createFormat(conf)); }
From source file:com.davidgildeh.hadoop.input.simpledb.SimpleDBDAO.java
License:Apache License
/** * Default Constructor, initialises SimpleDB Client * /*from ww w . j a v a2 s . c o m*/ * @param jobConf Hadoop Job Configuration */ public SimpleDBDAO(JobConf jobConf) { // Load Configuration String awsAccessKey = jobConf.get(SIMPLEDB_AWS_ACCESSKEY); String awsSecretKey = jobConf.get(SIMPLEDB_AWS_SECRETKEY); // Default to US-EAST Region String simpleDBRegion = jobConf.get(SIMPLEDB_AWS_REGION, "sdb.amazonaws.com"); sdb_domain = jobConf.get(SIMPLEDB_DOMAIN); whereQuery = jobConf.get(SIMPLEDB_WHERE_QUERY, null); // Initialise SimpleDB Client sdb = new AmazonSimpleDBClient(new BasicAWSCredentials(awsAccessKey, awsSecretKey)); sdb.setEndpoint(simpleDBRegion); }
From source file:com.digitalpebble.behemoth.gate.AbstractGATEMapper.java
License:Apache License
public void configure(JobConf job) { super.configure(job); config = job;/*from w w w.j a v a 2 s . c om*/ // we try to load the gate application // using the gate.app file String application_path = job.get("gate.application.path"); String gapp_file = job.get("gate.application.descriptor", "application.xgapp"); URL applicationDescriptorURL = null; // the application will have been unzipped and put on the distributed // cache try { String applicationName = new File(application_path).getCanonicalFile().getName(); // trim the zip if (applicationName.endsWith(".zip")) applicationName = applicationName.replaceAll(".zip", ""); Path[] localArchives = DistributedCache.getLocalCacheArchives(job); // identify the right archive for (Path la : localArchives) { String localPath = la.toUri().toString(); LOG.info("LocalCache : " + localPath); if (!localPath.endsWith(application_path)) continue; // see if the gapp file is directly under the dir applicationDescriptorURL = new URL("file://" + localPath + "/" + gapp_file); File f = new File(applicationDescriptorURL.getFile()); if (f.exists()) break; // or for older versions of the zipped pipelines applicationDescriptorURL = new URL("file://" + localPath + "/" + applicationName + "/" + gapp_file); break; } } catch (Exception e) { throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e); } if (applicationDescriptorURL == null) throw new RuntimeException("GATE app " + application_path + "not available in distributed cache"); processor = new GATEProcessor(applicationDescriptorURL); processor.setConf(config); }
From source file:com.digitalpebble.behemoth.solr.LucidWorksWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { String zkHost = job.get("solr.zkhost"); if (zkHost != null && zkHost.equals("") == false) { String collection = job.get("solr.zk.collection", "collection1"); LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost); solr = new CloudSolrServer(zkHost); ((CloudSolrServer) solr).setDefaultCollection(collection); } else {//from w ww . ja v a 2 s. c o m String solrURL = job.get("solr.server.url"); int queueSize = job.getInt("solr.client.queue.size", 100); int threadCount = job.getInt("solr.client.threads", 1); solr = new StreamingUpdateSolrServer(solrURL, queueSize, threadCount); } includeMetadata = job.getBoolean("lw.metadata", false); includeAnnotations = job.getBoolean("lw.annotations", false); // get the Behemoth annotations types and features // to store as SOLR fields // solr.f.name = BehemothType.featureName // e.g. solr.f.person = Person.string Iterator<Entry<String, String>> iterator = job.iterator(); while (iterator.hasNext()) { Entry<String, String> entry = iterator.next(); if (entry.getKey().startsWith("solr.f.") == false) continue; String fieldName = entry.getKey().substring("solr.f.".length()); String val = entry.getValue(); // see if a feature has been specified // if not we'll use '*' to indicate that we want // the text covered by the annotation HashMap<String, String> featureValMap = new HashMap<String, String>(); int separator = val.indexOf("."); String featureName = "*"; if (separator != -1) featureName = val.substring(separator + 1); featureValMap.put(featureName, fieldName); fieldMapping.put(entry.getValue(), featureValMap); LOG.debug("Adding to mapping " + entry.getValue() + " " + featureName + " " + fieldName); } }