List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, IColumn>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override/*from w w w. j a v a 2s. c om*/ public void progress() { reporter.progress(); } }; ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat.java
License:Open Source License
@Override public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { if (schema == null) { SchemaLoader loader = new SchemaLoader(job); this.schema = loader.load(job.get(SCHEMA_LITERAL), job.get(SCHEMA_URL), job.get(SCHEMA_TYPE_NAME)); this.converter = new JsonConverter(schema); this.readKey = job.getBoolean(READ_KEY, true); }//w ww . j a v a 2 s . c o m DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(schema)); if (getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); String codecName = job.get(AvroJob.CONF_OUTPUT_CODEC, org.apache.avro.file.DataFileConstants.DEFLATE_CODEC); CodecFactory codec = codecName.equals(DataFileConstants.DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(codec); } writer.setSyncInterval( job.getInt(AvroOutputFormat.SYNC_INTERVAL_KEY, DataFileConstants.DEFAULT_SYNC_INTERVAL)); Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroAsJSONRecordWriter(writer, converter, readKey); }
From source file:com.dataartisans.flink.cascading.runtime.util.FlinkFlowProcess.java
License:Apache License
@Override public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException { if (trap instanceof Hfs) { JobConf jobConf = new JobConf(this.getConfigCopy()); int stepNum = jobConf.getInt("cascading.flow.step.num", 0); int nodeNum = jobConf.getInt("cascading.flow.node.num", 0); String partname = String.format("-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum()); jobConf.set("cascading.tapcollector.partname", "%s%spart" + partname); String value = String.format("attempt_%012d_0000_m_%06d_0", (int) Math.rint(System.currentTimeMillis()), this.getCurrentSliceNum()); jobConf.set("mapred.task.id", value); jobConf.set("mapreduce.task.id", value); return trap.openForWrite(new FlinkFlowProcess(jobConf), null); } else {/*from www .j av a2s .c o m*/ throw new UnsupportedOperationException("Only Hfs taps are supported as traps"); } }
From source file:com.digitalpebble.behemoth.solr.LucidWorksWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { String zkHost = job.get("solr.zkhost"); if (zkHost != null && zkHost.equals("") == false) { String collection = job.get("solr.zk.collection", "collection1"); LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost); solr = new CloudSolrServer(zkHost); ((CloudSolrServer) solr).setDefaultCollection(collection); } else {/* w ww.ja va 2 s .co m*/ String solrURL = job.get("solr.server.url"); int queueSize = job.getInt("solr.client.queue.size", 100); int threadCount = job.getInt("solr.client.threads", 1); solr = new StreamingUpdateSolrServer(solrURL, queueSize, threadCount); } includeMetadata = job.getBoolean("lw.metadata", false); includeAnnotations = job.getBoolean("lw.annotations", false); // get the Behemoth annotations types and features // to store as SOLR fields // solr.f.name = BehemothType.featureName // e.g. solr.f.person = Person.string Iterator<Entry<String, String>> iterator = job.iterator(); while (iterator.hasNext()) { Entry<String, String> entry = iterator.next(); if (entry.getKey().startsWith("solr.f.") == false) continue; String fieldName = entry.getKey().substring("solr.f.".length()); String val = entry.getValue(); // see if a feature has been specified // if not we'll use '*' to indicate that we want // the text covered by the annotation HashMap<String, String> featureValMap = new HashMap<String, String>(); int separator = val.indexOf("."); String featureName = "*"; if (separator != -1) featureName = val.substring(separator + 1); featureValMap.put(featureName, fieldName); fieldMapping.put(entry.getValue(), featureValMap); LOG.debug("Adding to mapping " + entry.getValue() + " " + featureName + " " + fieldName); } }
From source file:com.digitalpebble.behemoth.solr.SOLRWriter.java
License:Apache License
public void open(JobConf job, String name) throws IOException { String zkHost = job.get("solr.zkhost"); if (zkHost != null && zkHost.equals("") == false) { String collection = job.get("solr.zk.collection", "collection1"); LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost); solr = new CloudSolrServer(zkHost); ((CloudSolrServer) solr).setDefaultCollection(collection); } else {/*w w w . j a v a2 s. com*/ String solrURL = job.get("solr.server.url"); int queueSize = job.getInt("solr.client.queue.size", 100); int threadCount = job.getInt("solr.client.threads", 1); solr = new ConcurrentUpdateSolrServer(solrURL, queueSize, threadCount); } String paramsString = job.get("solr.params"); if (paramsString != null) { params = new ModifiableSolrParams(); String[] pars = paramsString.trim().split("\\&"); for (String kvs : pars) { String[] kv = kvs.split("="); if (kv.length < 2) { LOG.warn("Invalid Solr param " + kvs + ", skipping..."); continue; } params.add(kv[0], kv[1]); } LOG.info("Using Solr params: " + params.toString()); } includeMetadata = job.getBoolean("solr.metadata", false); includeAnnotations = job.getBoolean("solr.annotations", false); useMetadataPrefix = job.getBoolean("solr.metadata.use.prefix", false); metadataPrefix = job.get("solr.metadata.prefix", "attr_"); annotationPrefix = job.get("solr.annotation.prefix", "annotate_"); useAnnotationPrefix = job.getBoolean("solr.annotation.use.prefix", false); populateSolrFieldMappingsFromBehemothAnnotationsTypesAndFeatures(job); }
From source file:com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner.java
License:Apache License
/** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and * <tt>total.order.partitioner.natural.order</tt> is not false, a trie * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same * comparator and contain {@link// www . ja v a 2s . c o m org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys. */ @SuppressWarnings("unchecked") // keytype from conf not static public void configure(JobConf job) { try { String parts = getPartitionFile(job); final Path partFile = new Path(parts); final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache : partFile.getFileSystem(job); //Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass(); K[] splitPoints = readPartitions(fs, partFile, (Class<K>) Tuple.class, job); if (splitPoints.length != job.getNumReduceTasks() - 1) { throw new IOException("Wrong number of partitions in keyset"); } RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator(); for (int i = 0; i < splitPoints.length - 1; ++i) { if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) { throw new IOException("Split points are out of order"); } } boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true); if (natOrder && BinaryComparable.class.isAssignableFrom(Tuple.class)) { partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], job.getInt("total.order.partitioner.max.trie.depth", 2)); } else { partitions = new BinarySearchNode(splitPoints, comparator); } } catch (IOException e) { throw new IllegalArgumentException("Can't read partitions file", e); } }
From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java
License:Apache License
@Override protected synchronized void submit() { JobConf jobConf = this.getJobConf(); boolean isLocalHadoop = jobConf.get("mapred.job.tracker", "local").equals("local"); // the default partitioner is {@link com.ebay.erl.mobius.core.datajoin.DataJoinKeyPartitioner} // which is hash based. ///*from www . j a va 2s . co m*/ // If user choose to use even partitioner, Mobius will use // {@link com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner} which // is sampling based partitioner of attempting to balance the load // for each reducer. String partitioner = jobConf.get("mobius.partitioner", "default"); if (!isLocalHadoop && jobConf.getNumReduceTasks() != 0 && partitioner.equals("even")) { // this job needs reducer, perform sampling on the keys to // make load on reducers are almost evenly distributed. double freq = jobConf.getFloat("mobius.sampler.freq", 0.1F); int numSamples = jobConf.getInt("mobius.sampler.num.samples", 50000); int maxSplits = jobConf.getInt("mobius.sampler.max.slipts.sampled", 5); // log sampling parameters so that user knows. LOGGER.info("Sampling parameters { " + "mobius.sampler.freq:" + format.format(freq) + ", " + "mobius.sampler.num.samples:" + numSamples + ", " + "mobius.sampler.max.slipts.sampled:" + maxSplits + "}"); InputSampler.Sampler<?, ?> sampler = new MobiusInputSampler(freq, numSamples, maxSplits); writePartitionFile(jobConf, sampler); // add to distributed cache try { URI partitionUri = new URI(TotalOrderPartitioner.getPartitionFile(jobConf) + "#_partitions"); LOGGER.info("Adding partition uri to distributed cache:" + partitionUri.toString()); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); jobConf.setPartitionerClass(EvenlyPartitioner.class); LOGGER.info("Using " + EvenlyPartitioner.class.getCanonicalName() + " to partiton the keys evenly among reducers."); } catch (URISyntaxException e) { LOGGER.error(e.getMessage(), e); throw new RuntimeException(e); } // adding -XX:-UseParallelOldGC, this will automatically set -XX:-UseParallelGC // according to Oracle's specification String jvmOpts = jobConf.get("mapred.child.java.opts", ""); if (jvmOpts.isEmpty()) { jvmOpts = "-XX:-UseParallelOldGC"; } else { if (jvmOpts.indexOf("-XX:-UseParallelOldGC") < 0) { // remove " jvmOpts = jvmOpts.replaceAll("\"", ""); jvmOpts = jvmOpts.concat(" -XX:-UseParallelOldGC"); } } jobConf.set("mapred.child.java.opts", jvmOpts); this.setJobConf(jobConf); } LOGGER.info("Submiting job:" + jobConf.getJobName()); super.submit(); }
From source file:com.flaptor.hounder.crawler.Nutch9Fetcher.java
License:Apache License
/** * Initialize the fetcher.// ww w . j a v a2 s .c om */ public Nutch9Fetcher() { Config config = Config.getConfig("nutchfetcher.properties"); segmentsDir = config.getString("fetchlist.dir"); keepUrl = config.getBoolean("keep.original.url.on.redirect"); fetcher = new Fetcher(); Configuration conf = new Configuration(); // conf.addDefaultResource("crawl-tool.xml"); conf.addDefaultResource("nutch-default.xml"); conf.addDefaultResource("nutch-site.xml"); JobConf job = new NutchJob(conf); threads = job.getInt("fetcher.threads.fetch", 10); fetcher.setConf(conf); }
From source file:com.github.gaoyangthu.demo.mapred.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf; int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }// ww w . j av a 2 s . c o m conf = new JobConf(getConf()); // Pick up the parameters, should the user set these width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentomino.class, Pentomino.class); for (int i = 0; i < args.length; i++) { if (args[i].equalsIgnoreCase("-depth")) { depth = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-height")) { height = Integer.parseInt(args[++i].trim()); } else if (args[i].equalsIgnoreCase("-width")) { width = Integer.parseInt(args[++i].trim()); } } // Set parameters for MR tasks to pick up either which way the user sets // them or not conf.setInt("pent.width", width); conf.setInt("pent.height", height); conf.setInt("pent.depth", depth); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }
From source file:com.hadoopilluminated.examples.dancing.DistributedPentomino.java
License:Apache License
public int run(String[] args) throws Exception { JobConf conf; int depth = 5; int width = 9; int height = 10; Class<? extends Pentomino> pentClass; if (args.length == 0) { System.out.println("pentomino <output>"); ToolRunner.printGenericCommandUsage(System.out); return -1; }/*from ww w . jav a2s.co m*/ conf = new JobConf(getConf()); width = conf.getInt("pent.width", width); height = conf.getInt("pent.height", height); depth = conf.getInt("pent.depth", depth); pentClass = conf.getClass("pent.class", OneSidedPentonimo.class, Pentomino.class); Path output = new Path(args[0]); Path input = new Path(output + "_input"); FileSystem fileSys = FileSystem.get(conf); try { FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, output); conf.setJarByClass(PentMap.class); conf.setJobName("dancingElephant"); Pentomino pent = ReflectionUtils.newInstance(pentClass, conf); pent.initialize(width, height); createInputDirectory(fileSys, input, pent, depth); // the keys are the prefix strings conf.setOutputKeyClass(Text.class); // the values are puzzle solutions conf.setOutputValueClass(Text.class); conf.setMapperClass(PentMap.class); conf.setReducerClass(IdentityReducer.class); conf.setNumMapTasks(2000); conf.setNumReduceTasks(1); JobClient.runJob(conf); } finally { fileSys.delete(input, true); } return 0; }