List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.junz.hadoop.custom.SytsLogInputFormat.java
License:Apache License
public static void setNumberOfMaps(Job job, long number) { job.getConfiguration().setLong(NUMBER_MAP_PROPERTY, number); }
From source file:com.jyz.study.hadoop.hbase.mapreduce.HFileOutputFormatBase.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This//w w w . ja v a 2 s.c o m * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the * DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of * regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's * requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either * KeyValueSortReducer or PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either * KeyValue or Put before running this function. */ public static void configureIncrementalLoad(Job job, HTable table, Class<? extends HFileOutputFormatBase> hfileOutputFormatBase) throws IOException { Configuration conf = job.getConfiguration(); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setOutputFormatClass(hfileOutputFormatBase); // Based on the configured map output class, set the correct reducer to // properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(PutSortReducer.class); } else if (Text.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(TextSortReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } conf.setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), KeyValueSerialization.class.getName()); // Use table's region boundaries for TOP split points. LOG.info("Looking up current regions for table " + Bytes.toString(table.getTableName())); List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); job.setNumReduceTasks(startKeys.size()); configurePartitioner(job, startKeys); // Set compression algorithms based on column families configureCompression(table, conf); configureBloomType(table, conf); configureBlockSize(table, conf); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); LOG.info("Incremental table " + Bytes.toString(table.getTableName()) + " output configured."); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Comparator class name for columns./* w w w .java2s . c o m*/ * * @param value * The value of the property * @param job * The current job */ public static void setComparatorClass(String value, Job job) { job.getConfiguration().set(PropertyConstants.COLUMN_COMPARATOR.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * This is not required if the column family type is standard. * * @param value/*from w ww .j a v a2 s . co m*/ * The value of the property * @param job * The current job */ public static void setSubComparatorClass(String value, Job job) { job.getConfiguration().set(PropertyConstants.COLUMN_SUBCOMPARATOR.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Partitioner for decorating keys./*w ww . jav a2s . co m*/ * * @param value * The value of the property * @param job * The current job */ public static void setPartitionerClass(String value, Job job) { job.getConfiguration().set(PropertyConstants.PARTITIONER.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Sets the sparse column property// w ww. j a v a 2s . c o m * * @param value * The value of the property * @param job * The current job */ public static void setIsSparse(boolean value, Job job) { job.getConfiguration().setBoolean(PropertyConstants.SPARSE_COLUMN.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Column family type needs to be set if the column family type is Super. * * @param value//ww w .j a va 2s. c o m * The value of the property * @param job * The current job */ public static void setColumnFamilyType(String value, Job job) { job.getConfiguration().set(PropertyConstants.COLUMN_FAMILY_TYPE.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Set the name of the column family to read. This is optional. If not set all the data tables * under the given input directory will be collected and processed. * * @param value/*from w w w . j av a 2 s. c o m*/ * The value of the property * @param job * The current job */ public static void setColumnFamilyName(String value, Job job) { job.getConfiguration().set(PropertyConstants.COLUMN_FAMILY_NAME.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormat.java
License:Apache License
/** * Set the name of the keyspace to read. This is optional. If not set all the data tables * under the given input directory will be collected and processed. * * @param value//from w ww . j a v a2 s . c o m * The value of the property * @param job * The current job */ public static void setKeyspaceName(String value, Job job) { job.getConfiguration().set(PropertyConstants.KEYSPACE_NAME.txt, value); }
From source file:com.knewton.mapreduce.io.SSTableInputFormatTest.java
License:Apache License
/** * Tests to see if when given an input directory the {@link SSTableInputFormat} correctly * expands all sub directories and picks up all the data tables corresponding to a specific * column family./* w w w . ja v a 2 s. c o m*/ */ @Test public void testListStatusWithColumnFamilyName() throws IOException { String cfName = "col_fam"; Job job = Job.getInstance(new Configuration(false)); Configuration conf = job.getConfiguration(); SSTableInputFormat.setColumnFamilyName(cfName, job); List<FileStatus> result = testListStatus(conf, "./src/test/resources/input/"); assertEquals(NUM_TABLES * NUM_TOKENS, result.size()); }