List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:com.digitalpebble.behemoth.solr.SOLRWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { String zkHost = job.get("solr.zkhost"); if (zkHost != null && zkHost.equals("") == false) { String collection = job.get("solr.zk.collection", "collection1"); LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost); solr = new CloudSolrServer(zkHost); ((CloudSolrServer) solr).setDefaultCollection(collection); } else {//from w w w . j av a2 s. c o m String solrURL = job.get("solr.server.url"); int queueSize = job.getInt("solr.client.queue.size", 100); int threadCount = job.getInt("solr.client.threads", 1); solr = new ConcurrentUpdateSolrServer(solrURL, queueSize, threadCount); } String paramsString = job.get("solr.params"); if (paramsString != null) { params = new ModifiableSolrParams(); String[] pars = paramsString.trim().split("\\&"); for (String kvs : pars) { String[] kv = kvs.split("="); if (kv.length < 2) { LOG.warn("Invalid Solr param " + kvs + ", skipping..."); continue; } params.add(kv[0], kv[1]); } LOG.info("Using Solr params: " + params.toString()); } includeMetadata = job.getBoolean("solr.metadata", false); includeAnnotations = job.getBoolean("solr.annotations", false); useMetadataPrefix = job.getBoolean("solr.metadata.use.prefix", false); metadataPrefix = job.get("solr.metadata.prefix", "attr_"); annotationPrefix = job.get("solr.annotation.prefix", "annotate_"); useAnnotationPrefix = job.getBoolean("solr.annotation.use.prefix", false); populateSolrFieldMappingsFromBehemothAnnotationsTypesAndFeatures(job); }
From source file:com.digitalpebble.behemoth.solr.SOLRWriter.java
License:Apache License
protected void populateSolrFieldMappingsFromBehemothAnnotationsTypesAndFeatures(JobConf job) { // get the Behemoth annotations types and features // to store as SOLR fields // solr.f.name = BehemothType.featureName // e.g. solr.f.person = Person.string will map the "string" feature of // "Person" annotations onto the Solr field "person" Iterator<Entry<String, String>> iterator = job.iterator(); while (iterator.hasNext()) { Entry<String, String> entry = iterator.next(); if (entry.getKey().startsWith("solr.f.") == false) continue; String solrFieldName = entry.getKey().substring("solr.f.".length()); populateMapping(solrFieldName, entry.getValue()); }/*from w ww . j av a 2s. com*/ String list = job.get("solr.annotations.list"); if (useAnnotationPrefix) { if (list == null || list.trim().length() == 0) // Include all annotations if no annotations list is not defined includeAllAnnotations = true; else { // Include only annotations defined in the "solr.annotations.list" with the prefix String[] names = list.split("\\s+"); for (String name : names) { String solrFieldName = annotationPrefix + name; populateMapping(solrFieldName, name); } } } else { // Include specified annotations without prefix if annotations list is defined. // These fields would have to explicitly defined in Solr schema since solr.annotation.use.prefix // is not defined or field mapping has to be defined if (list == null || list.trim().length() == 0) { return; } String[] names = list.split("\\s+"); for (String name : names) { String solrFieldName = name; populateMapping(solrFieldName, name); } } }
From source file:com.digitalpebble.behemoth.tika.TikaMapper.java
License:Apache License
@Override public void configure(JobConf job) { String handlerName = job.get(TikaConstants.TIKA_PROCESSOR_KEY); if (handlerName != null) { Class handlerClass = job.getClass(handlerName, TikaProcessor.class); try {/*w w w . j a v a2s. c om*/ processor = (TikaProcessor) handlerClass.newInstance(); } catch (InstantiationException e) { LOG.error("Exception", e); // TODO: what's the best way to do this? throw new RuntimeException(e); } catch (IllegalAccessException e) { LOG.error("Exception", e); throw new RuntimeException(e); } } else { processor = new TikaProcessor(); } processor.setConf(job); }
From source file:com.digitalpebble.behemoth.uima.UIMAMapper.java
License:Apache License
public void configure(JobConf conf) { this.config = conf; storeshortnames = config.getBoolean("uima.store.short.names", true); File pearpath = new File(conf.get("uima.pear.path")); String pearname = pearpath.getName(); URL urlPEAR = null;// w w w .j a va 2s . c om try { Path[] localArchives = DistributedCache.getLocalCacheFiles(conf); // identify the right archive for (Path la : localArchives) { String localPath = la.toUri().toString(); LOG.info("Inspecting local paths " + localPath); if (!localPath.endsWith(pearname)) continue; urlPEAR = new URL("file://" + localPath); break; } } catch (IOException e) { throw new RuntimeException("Impossible to retrieve gate application from distributed cache", e); } if (urlPEAR == null) throw new RuntimeException("UIMA pear " + pearpath + " not available in distributed cache"); File pearFile = new File(urlPEAR.getPath()); // should check whether a different mapper has already unpacked it // but for now we just unpack in a different location for every mapper TaskAttemptID attempt = TaskAttemptID.forName(conf.get("mapred.task.id")); installDir = new File(pearFile.getParentFile(), attempt.toString()); PackageBrowser instPear = PackageInstaller.installPackage(installDir, pearFile, true); // get the resources required for the AnalysisEngine org.apache.uima.resource.ResourceManager rsrcMgr = UIMAFramework.newDefaultResourceManager(); // Create analysis engine from the installed PEAR package using // the created PEAR specifier XMLInputSource in; try { in = new XMLInputSource(instPear.getComponentPearDescPath()); ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); tae = UIMAFramework.produceAnalysisEngine(specifier, rsrcMgr, null); cas = tae.newCAS(); } catch (Exception e) { throw new RuntimeException(e); } String[] featuresFilters = this.config.get("uima.features.filter", "").split(","); // the featurefilters have the following form : Type:featureName // we group them by annotation type for (String ff : featuresFilters) { String[] fp = ff.split(":"); if (fp.length != 2) continue; Set<Feature> features = featfilts.get(fp[0]); if (features == null) { features = new HashSet<Feature>(); featfilts.put(fp[0], features); } Feature f = cas.getTypeSystem().getFeatureByFullName(ff); if (f != null) features.add(f); } String[] annotTypes = this.config.get("uima.annotations.filter", "").split(","); uimatypes = new ArrayList<Type>(annotTypes.length); for (String type : annotTypes) { Type aType = cas.getTypeSystem().getType(type); uimatypes.add(aType); } }
From source file:com.dnb.daas.monitoring.common.mr.taps.DeleteTableOutputFormat.java
public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { String tableName = job.get("hbase.mapred.outputtable"); HTable table = null;/*from w w w . j a va2 s .co m*/ try { table = new HTable(HBaseConfiguration.create(job), tableName); } catch (IOException e) { LOGGER.error("Error: ", e); throw e; } finally { table.close(); } table.setAutoFlush(false, true); return new DeleteTableOutputFormat.TableRecordWriter(table); }
From source file:com.dumbomr.mapred.TypedBytesTableOutputFormat.java
License:Apache License
@Override @SuppressWarnings("unchecked") public RecordWriter getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { // expecting exactly one path String tableName = job.get(OUTPUT_TABLE); String zk_config = job.get(ZK_HOST); HTable table = null;//from w w w . j a va 2 s. com Configuration config = HBaseConfiguration.create(job); if (zk_config != null) { config.set(ZK_HOST, zk_config); } try { table = new HTable(config, tableName); } catch (IOException e) { LOG.error(e); throw e; } table.setAutoFlush(false, true); return new TableRecordWriter(table); }
From source file:com.dumbomr.mapred.TypedBytesTableOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException { String tableName = job.get(OUTPUT_TABLE); if (tableName == null) { throw new IOException("Must specify table name"); }/*from w w w .j ava2 s. com*/ }
From source file:com.ebay.erl.mobius.core.mapred.MobiusInputSampler.java
License:Apache License
private AbstractMobiusMapper getMapper(InputFormat inf, InputSplit split, JobConf conf) throws IOException { AbstractMobiusMapper mapper = null;/*from www . j a v a 2 s . co m*/ if (inf instanceof MobiusDelegatingInputFormat) { Class<AbstractMobiusMapper> mapperClass = ((MobiusDelegatingInputFormat) inf).getMapper(split, conf); mapper = ReflectionUtils.newInstance(mapperClass, conf); } else { Class<? extends AbstractMobiusMapper> mapperClass = (Class<? extends AbstractMobiusMapper>) Util .getClass(conf.get(ConfigureConstants.MAPPER_CLASS)); mapper = ReflectionUtils.newInstance(mapperClass, conf); } return mapper; }
From source file:com.facebook.presto.hive.HiveWriterFactory.java
License:Apache License
public static String getFileExtension(JobConf conf, StorageFormat storageFormat) { // text format files must have the correct extension when compressed if (!HiveConf.getBoolVar(conf, COMPRESSRESULT) || !HiveIgnoreKeyTextOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { return ""; }/* w w w. jav a 2 s . c o m*/ String compressionCodecClass = conf.get("mapred.output.compression.codec"); if (compressionCodecClass == null) { return new DefaultCodec().getDefaultExtension(); } try { Class<? extends CompressionCodec> codecClass = conf.getClassByName(compressionCodecClass) .asSubclass(CompressionCodec.class); return ReflectionUtil.newInstance(codecClass, conf).getDefaultExtension(); } catch (ClassNotFoundException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Compression codec not found: " + compressionCodecClass, e); } catch (RuntimeException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Failed to load compression codec: " + compressionCodecClass, e); } }
From source file:com.facebook.presto.hive.OrcFileWriterFactory.java
License:Apache License
private static CompressionKind getCompression(Properties schema, JobConf configuration) { String compressionName = schema.getProperty(OrcTableProperties.COMPRESSION.getPropName()); if (compressionName == null) { compressionName = configuration.get("hive.exec.orc.default.compress"); }//from w ww.ja v a 2 s. c o m if (compressionName == null) { return CompressionKind.ZLIB; } CompressionKind compression; try { compression = CompressionKind.valueOf(compressionName.toUpperCase(ENGLISH)); } catch (IllegalArgumentException e) { throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, "Unknown ORC compression type " + compressionName); } return compression; }