List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:it.crs4.pydoop.pipes.PipesReducer.java
License:Apache License
public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Check whether the job is using a Java RecordReader * @param conf the configuration to check * @return is it a Java RecordReader?/* w ww. j a v a 2 s . co m*/ */ public static boolean getIsJavaRecordReader(JobConf conf) { return conf.getBoolean(Submitter.IS_JAVA_RR, false); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Check whether the job is using a Java Mapper. * @param conf the configuration to check * @return is it a Java Mapper?// w w w . j ava2s. c om */ public static boolean getIsJavaMapper(JobConf conf) { return conf.getBoolean(Submitter.IS_JAVA_MAP, false); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Check whether the job is using a Java Reducer. * @param conf the configuration to check * @return is it a Java Reducer?//from ww w .j av a2s.co m */ public static boolean getIsJavaReducer(JobConf conf) { return conf.getBoolean(Submitter.IS_JAVA_REDUCE, false); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Will the reduce use a Java RecordWriter? * @param conf the configuration to check * @return true, if the output of the job will be written by Java *///w w w . j av a2 s . c o m public static boolean getIsJavaRecordWriter(JobConf conf) { return conf.getBoolean(Submitter.IS_JAVA_RW, false); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Does the user want to keep the command file for debugging? If this is * true, pipes will write a copy of the command data to a file in the * task directory named "downlink.data", which may be used to run the C++ * program under the debugger. You probably also want to set * JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from * being deleted.//from w w w . j a v a 2 s . c o m * To run using the data file, set the environment variable * "mapreduce.pipes.commandfile" to point to the file. * @param conf the configuration to check * @return will the framework save the command file? */ public static boolean getKeepCommandFile(JobConf conf) { return conf.getBoolean(Submitter.PRESERVE_COMMANDFILE, false); }
From source file:ivory.preprocess.BuildIntDocVectorsForwardIndex.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class); FileSystem fs = FileSystem.get(conf); String indexPath = conf.get("Ivory.IndexPath"); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); int mapTasks = conf.getInt("Ivory.NumMapTasks", 0); String collectionName = env.readCollectionName(); boolean buildWeighted = conf.getBoolean("Ivory.BuildWeighted", false); sLogger.info("Tool: BuildIntDocVectorsIndex"); sLogger.info(" - IndexPath: " + indexPath); sLogger.info(" - CollectionName: " + collectionName); sLogger.info(" - BuildWeighted: " + buildWeighted); sLogger.info(" - NumMapTasks: " + mapTasks); String intDocVectorsPath;/*from w w w. j av a2 s .c om*/ String forwardIndexPath; if (buildWeighted) { intDocVectorsPath = env.getWeightedIntDocVectorsDirectory(); forwardIndexPath = env.getWeightedIntDocVectorsForwardIndex(); } else { intDocVectorsPath = env.getIntDocVectorsDirectory(); forwardIndexPath = env.getIntDocVectorsForwardIndex(); } if (!fs.exists(new Path(intDocVectorsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("IntDocVectorIndex already exists: skipping!"); return 0; } conf.setJobName("BuildIntDocVectorsForwardIndex:" + collectionName); Path inputPath = new Path(intDocVectorsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat.java
License:Apache License
@Override public RecordWriter<Text, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema;/*from ww w. ja v a 2 s .co m*/ Schema.Parser p = new Schema.Parser(); String strSchema = job.get("iow.streaming.output.schema"); if (strSchema == null) { String schemaFile = job.get("iow.streaming.output.schema.file", "streaming_output_schema"); if (job.getBoolean("iow.streaming.schema.use.prefix", false)) { // guess schema from file name // format is: schema:filename // with special keyword default - 'default:filename' String str[] = name.split(":"); if (!str[0].equals("default")) schemaFile = str[0]; name = str[1]; } LOG.info(this.getClass().getSimpleName() + ": Using schema from file: " + schemaFile); File f = new File(schemaFile); schema = p.parse(f); } else { LOG.info(this.getClass().getSimpleName() + ": Using schema from jobconf."); schema = p.parse(strSchema); } if (schema == null) { throw new IOException("Can't find proper output schema"); } DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>()); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name + org.apache.avro.mapred.AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return createRecordWriter(writer, schema); }
From source file:net.iponweb.hadoop.streaming.io.ByKeyOutputFormat.java
License:Apache License
private void initialize(JobConf job) throws IOException { SupportedOutputFormats.put("text", "org.apache.hadoop.mapred.TextOutputFormat"); SupportedOutputFormats.put("sequence", "org.apache.hadoop.mapred.SequenceFileOutputFormat"); SupportedOutputFormats.put("avrojson", "net.iponweb.hadoop.streaming.avro.AvroAsJsonOutputFormat"); SupportedOutputFormats.put("avrotext", "net.iponweb.hadoop.streaming.avro.AvroAsTextOutputFormat"); SupportedOutputFormats.put("parquettext", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat"); SupportedOutputFormats.put("parquetjson", "net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat"); String format = job.get("iow.streaming.bykeyoutputformat", "text"); for (String f : SupportedOutputFormats.keySet()) if (f.equals(format)) { try { internalOutputFormat = (OutputFormat<Text, Text>) Class.forName(SupportedOutputFormats.get(f)) .newInstance();/*w w w . j ava 2 s .c o m*/ } catch (Exception e) { e.printStackTrace(); throw new IOException("Can't instantiate class '" + SupportedOutputFormats.get(f) + "'"); } } if (internalOutputFormat == null) throw new IOException("Unknown result type: '" + format + "'"); assumeFileNamesSorted = job.getBoolean("iow.streaming.bykeyoutputformat.keys.sorted", false); String delimiter = job.get("map.output.key.field.separator", "\t"); splitter = new KeyValueSplitter(delimiter); LOG.info(getClass().getSimpleName() + " initialized, output format is: " + format); }
From source file:net.iponweb.hadoop.streaming.parquet.ParquetAsTextOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getRecordWriter(FileSystem fs, JobConf job, String name, Progressable progress) throws IOException { // find and load schema String writeSchema = job.get("iow.streaming.output.schema"); MessageType s;// www. jav a2s . c o m if (writeSchema == null) { String schemaFile = job.get("iow.streaming.output.schema.file", "streaming_output_schema"); if (job.getBoolean("iow.streaming.schema.use.prefix", false)) { // guess schema from file name // format is: schema:filename // with special keyword default - 'default:filename' String str[] = name.split(":"); if (!str[0].equals("default")) schemaFile = str[0]; name = str[1]; } LOG.info("Using schema: " + schemaFile); File f = new File(schemaFile); try { BufferedReader reader = new BufferedReader(new FileReader(f)); StringBuilder r = new StringBuilder(); String line; while ((line = reader.readLine()) != null) r.append(line); writeSchema = r.toString(); } catch (Throwable e) { LOG.error("Can't read schema file " + schemaFile); Throwables.propagateIfPossible(e, IOException.class); throw new RuntimeException(e); } } s = MessageTypeParser.parseMessageType(writeSchema); setWriteSupportClass(job, GroupWriteSupport.class); GroupWriteSupport.setSchema(s, job); CompressionCodecName codec = getCodec(job); String extension = codec.getExtension() + ".parquet"; Path file = getDefaultWorkFile(job, name, extension); ParquetRecordWriter<SimpleGroup> realWriter; try { realWriter = (ParquetRecordWriter<SimpleGroup>) realOutputFormat.getRecordWriter(job, file, codec); } catch (InterruptedException e) { Thread.interrupted(); throw new IOException(e); } return createRecordWriter(realWriter, fs, job, name, progress); }