List of usage examples for org.apache.hadoop.mapred JobConf setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:io.prestosql.plugin.hive.parquet.ParquetTester.java
License:Apache License
void assertRoundTrip(List<ObjectInspector> objectInspectors, Iterable<?>[] writeValues, Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, Optional<MessageType> parquetSchema, boolean singleLevelArray) throws Exception { for (WriterVersion version : versions) { for (CompressionCodecName compressionCodecName : compressions) { for (ConnectorSession session : sessions) { try (TempFile tempFile = new TempFile("test", "parquet")) { JobConf jobConf = new JobConf(); jobConf.setEnum(COMPRESSION, compressionCodecName); jobConf.setBoolean(ENABLE_DICTIONARY, true); jobConf.setEnum(WRITER_VERSION, version); writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, createTableProperties(columnNames, objectInspectors), getStandardStructObjectInspector(columnNames, objectInspectors), getIterators(writeValues), parquetSchema, singleLevelArray); assertFileContents(session, tempFile.getFile(), getIterators(readValues), columnNames, columnTypes); }/* w w w . ja va 2 s . c om*/ } } } }
From source file:io.prestosql.plugin.hive.parquet.ParquetTester.java
License:Apache License
void assertMaxReadBytes(List<ObjectInspector> objectInspectors, Iterable<?>[] writeValues, Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, Optional<MessageType> parquetSchema, DataSize maxReadBlockSize) throws Exception { WriterVersion version = PARQUET_1_0; CompressionCodecName compressionCodecName = UNCOMPRESSED; HiveClientConfig config = new HiveClientConfig().setHiveStorageFormat(HiveStorageFormat.PARQUET) .setUseParquetColumnNames(false).setParquetMaxReadBlockSize(maxReadBlockSize); ConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()) .getSessionProperties()); try (TempFile tempFile = new TempFile("test", "parquet")) { JobConf jobConf = new JobConf(); jobConf.setEnum(COMPRESSION, compressionCodecName); jobConf.setBoolean(ENABLE_DICTIONARY, true); jobConf.setEnum(WRITER_VERSION, version); writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, createTableProperties(columnNames, objectInspectors), getStandardStructObjectInspector(columnNames, objectInspectors), getIterators(writeValues), parquetSchema, false);//from w w w . ja v a 2s . c om Iterator<?>[] expectedValues = getIterators(readValues); try (ConnectorPageSource pageSource = getFileFormat().createFileFormatReader(session, HDFS_ENVIRONMENT, tempFile.getFile(), columnNames, columnTypes)) { assertPageSource(columnTypes, expectedValues, pageSource, Optional.of(getParquetMaxReadBlockSize(session).toBytes())); assertFalse(stream(expectedValues).allMatch(Iterator::hasNext)); } } }
From source file:io.prestosql.rcfile.RcFileTester.java
License:Apache License
private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Properties schema = new Properties(); schema.setProperty(META_TABLE_COLUMNS, "test"); schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName()); Deserializer deserializer;/* w w w . j a va 2 s . c o m*/ if (format == Format.BINARY) { deserializer = new LazyBinaryColumnarSerDe(); } else { deserializer = new ColumnarSerDe(); } deserializer.initialize(configuration, schema); configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName()); InputFormat<K, V> inputFormat = new RCFileInputFormat<>(); RecordReader<K, V> recordReader = inputFormat .getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL); K key = recordReader.createKey(); V value = recordReader.createValue(); StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); while (recordReader.next(key, value)) { Object expectedValue = iterator.next(); Object rowData = deserializer.deserialize(value); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Set whether the job is using a Java RecordReader. * @param conf the configuration to modify * @param value the new value/* w w w . j av a 2 s . com*/ */ public static void setIsJavaRecordReader(JobConf conf, boolean value) { conf.setBoolean(Submitter.IS_JAVA_RR, value); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Set whether the Mapper is written in Java. * @param conf the configuration to modify * @param value the new value//from ww w . j av a 2 s . com */ public static void setIsJavaMapper(JobConf conf, boolean value) { conf.setBoolean(Submitter.IS_JAVA_MAP, value); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Set whether the Reducer is written in Java. * @param conf the configuration to modify * @param value the new value// w ww . ja v a 2s. c o m */ public static void setIsJavaReducer(JobConf conf, boolean value) { conf.setBoolean(Submitter.IS_JAVA_REDUCE, value); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Set whether the job will use a Java RecordWriter. * @param conf the configuration to modify * @param value the new value to set/*w ww . j a va 2s. c o m*/ */ public static void setIsJavaRecordWriter(JobConf conf, boolean value) { conf.setBoolean(Submitter.IS_JAVA_RW, value); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
/** * Set whether to keep the command file for debugging * @param conf the configuration to modify * @param keep the new value//from w w w . ja v a 2 s . c o m */ public static void setKeepCommandFile(JobConf conf, boolean keep) { conf.setBoolean(Submitter.PRESERVE_COMMANDFILE, keep); }
From source file:it.isislab.sof.core.engine.hadoop.sshclient.utils.simulation.executor.SOF.java
License:Apache License
public static void main(String[] args) { /**//from ww w . j a v a 2 s. c o m * aids /home/michele/Scrivania/aids netlogo /home/michele/Scrivania/aids/aids.nlogo /home/michele/Scrivania/aids/input.tmp /home/michele/Scrivania/aids/output /home/michele/Scrivania/aids/output.xml false pepp ciao * */ /* try {//Runtime.getRuntime().exec("rm -r /home/lizard87/Desktop/mason_test/output"); Runtime.getRuntime().exec("rm -r /home/michele/Scrivania/aids/output"); } catch (IOException e) {e.printStackTrace();}*/ if (args.length < 9 || args.length == 11 || args.length == 12 || args.length >= 15) { System.out.println("Usage:"); System.out.println("java -jar SCUD.jar " + "<simulation_name> " + "<simulation_path_home> " + "<simulation_type[mason |netlogo |generic]>" + "<simulation_generic_interpreter_path>" + "<simultion_program_path> " + "<simulation_mapper_input_path> " + "<simulation_mapper_output_path> " + "<simulation_output_domain_xmlfile> " + "<simulation_input_path> " + "<<simulation_rating_path>>" + "<oneshot[one|loop]> " + "<author_name> " + "<simulation_description> " + "<path_interpreter_evaluate_file> " + "<evaluate_file_path>"); System.exit(-1); } Configuration conf = null; JobConf job = null; String AUTHOR = null;/*author name*/ String SIMULATION_NAME = null;/*simulation name*/ String SIMULATION_HOME = null;/*path simulation*/ String SIM_TYPE = null;/*mason, netlogo, generic*/ String SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = null; String SIM_EXECUTABLE_SIMULATION_PROGRAM = null; /*executable program *.jar | *.nlogo*/ String SIM_EXECUTION_INPUT_DATA_MAPPER = null;/*input.data path */ String SIM_EXECUTION_OUTPUT_MAPPER = null;/*output loop(i) path*/ String SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = null;/*path of domain file */ String SIM_EXECUTION_INPUT_XML = null;/*execution input path*/ boolean ISLOOP = false;/*false[one] | true[loop]*/ //String DESCRIPTION=null;/*simulations' description*/ String INTERPRETER_REMOTE_PATH_EVALUATION = null;/*remote program bin path for executing EvalFoo*/ String EXECUTABLE_RATING_FILE = null;/*path of rating file*/ String SIM_RATING_PATH = null; // aids /home/michele/Scrivania/aids netlogo /home/michele/Scrivania/aids/aids.nlogo /home/michele/Scrivania/aids/input.tmp /home/michele/Scrivania/aids/output /home/michele/Scrivania/aids/domain.xml /home/michele/Scrivania/aids/input loop pepp ciao /usr/bin/python /home/michele/Scrivania/aids/evaluate.py if (args.length == 13) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[3]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[4]; SIM_EXECUTION_OUTPUT_MAPPER = args[5]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[6]; SIM_EXECUTION_INPUT_XML = args[7]; SIM_RATING_PATH = args[8]; ISLOOP = Boolean.parseBoolean(args[9]); AUTHOR = args[10]; //DESCRIPTION=args[11]; INTERPRETER_REMOTE_PATH_EVALUATION = args[11]; EXECUTABLE_RATING_FILE = args[12]; // System.out.println(DESCRIPTION); //System.out.println(INTERPRETER_REMOTE_PATH_EVALUATION); } else if (args.length == 9) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[3]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[4]; SIM_EXECUTION_OUTPUT_MAPPER = args[5]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[6]; ISLOOP = Boolean.parseBoolean(args[7]); AUTHOR = args[8]; //DESCRIPTION=args[9]; } else if (args.length == 14) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = args[3]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[4]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[5]; SIM_EXECUTION_OUTPUT_MAPPER = args[6]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[7]; SIM_EXECUTION_INPUT_XML = args[8]; SIM_RATING_PATH = args[9]; ISLOOP = Boolean.parseBoolean(args[10]); AUTHOR = args[11]; // DESCRIPTION=args[12]; INTERPRETER_REMOTE_PATH_EVALUATION = args[12]; EXECUTABLE_RATING_FILE = args[13]; } else if (args.length == 10) { SIMULATION_NAME = args[0]; SIMULATION_HOME = args[1]; SIM_TYPE = args[2]; SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH = args[3]; SIM_EXECUTABLE_SIMULATION_PROGRAM = args[4]; SIM_EXECUTION_INPUT_DATA_MAPPER = args[5]; SIM_EXECUTION_OUTPUT_MAPPER = args[6]; SIM_DESCRIPTION_OUTPUT_XML_DOMAIN = args[7]; ISLOOP = Boolean.parseBoolean(args[8]); AUTHOR = args[9]; // DESCRIPTION=args[10]; } if (!(SIM_TYPE.equalsIgnoreCase("mason") || SIM_TYPE.equalsIgnoreCase("netlogo") || SIM_TYPE.equalsIgnoreCase("generic"))) { System.exit(-2); } conf = new Configuration(); job = new JobConf(conf, SOF.class); job.setJobName(SIMULATION_NAME/*SIMULATION NAME*/); job.set("simulation.home", SIMULATION_HOME); job.set("simulation.name", SIMULATION_NAME); job.set("simulation.type", SIM_TYPE); if (SIM_TYPE.equalsIgnoreCase("generic")) { job.set("simulation.interpreter.genericsim", SIM_EXECUTABLE_SIMULATION_INTERPRETER_PATH); } job.set("simulation.program.simulation", SIM_EXECUTABLE_SIMULATION_PROGRAM); job.set("simulation.executable.input", SIM_EXECUTION_INPUT_DATA_MAPPER); job.set("simulation.executable.output", SIM_EXECUTION_OUTPUT_MAPPER); job.setBoolean("simulation.executable.mode", ISLOOP); //job.set("simulation.executable.mode", ISLOOP); job.set("simulation.executable.author", AUTHOR); //job.set("simulation.executable.description", DESCRIPTION); job.set("simulation.description.output.domain", SIM_DESCRIPTION_OUTPUT_XML_DOMAIN); /** * GENERA IL .TMP * COMMENTA LA LINEA * TEST IN LOCALE * SOLO PER IL LOCALE */ //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/mason_test/input.xml"); //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/input.xml"); //XmlToText.convertXmlFileToFileText(conf,"/home/lizard87/Desktop/aids/input.xml"); if (ISLOOP) { job.set("simulation.description.input", SIM_EXECUTION_INPUT_XML); job.set("simulation.program.rating", EXECUTABLE_RATING_FILE); //job.set("simulation.interpreter.selection", INTERPRETER_REMOTE_PATH_SELECTION); job.set("simulation.interpreter.rating", INTERPRETER_REMOTE_PATH_EVALUATION); job.set("simulation.executable.loop.rating", SIM_RATING_PATH); } FileInputFormat.addInputPath(job, new Path(SIM_EXECUTION_INPUT_DATA_MAPPER)/*DIRECTORY INPUT*/); FileOutputFormat.setOutputPath(job, new Path(SIM_EXECUTION_OUTPUT_MAPPER)); if (SIM_TYPE.equalsIgnoreCase("mason")) { job.setMapperClass(SOFMapperMason.class); job.setReducerClass(SOFReducerMason.class); } else if (SIM_TYPE.equalsIgnoreCase("netlogo")) { job.setMapperClass(SOFMapperNetLogo.class); job.setReducerClass(SOFReducerNetLogo.class); } else if (SIM_TYPE.equalsIgnoreCase("generic")) { job.setMapperClass(SOFMapperGeneric.class); job.setReducerClass(SOFReducerGeneric.class); } job.setOutputKeyClass(org.apache.hadoop.io.Text.class); job.setOutputValueClass(org.apache.hadoop.io.Text.class); JobClient jobc; try { jobc = new JobClient(job); System.out.println(jobc + " " + job); RunningJob runjob; runjob = JobClient.runJob(job); while (runjob.getJobStatus().equals(JobStatus.SUCCEEDED)) { } System.exit(0); } catch (IOException e) { e.printStackTrace(); } }
From source file:ivory.core.preprocess.BuildTargetLangWeightedIntDocVectors.java
License:Apache License
@SuppressWarnings("deprecation") public int runTool() throws Exception { // sLogger.setLevel(Level.DEBUG); sLogger.info("PowerTool: GetTargetLangWeightedIntDocVectors"); JobConf conf = new JobConf(BuildTargetLangWeightedIntDocVectors.class); FileSystem fs = FileSystem.get(conf); String indexPath = getConf().get("Ivory.IndexPath"); RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs); String outputPath = env.getWeightedIntDocVectorsDirectory(); int mapTasks = getConf().getInt("Ivory.NumMapTasks", 0); int minSplitSize = getConf().getInt("Ivory.MinSplitSize", 0); String collectionName = getConf().get("Ivory.CollectionName"); sLogger.info("Characteristics of the collection:"); sLogger.info(" - CollectionName: " + collectionName); sLogger.info("Characteristics of the job:"); sLogger.info(" - NumMapTasks: " + mapTasks); sLogger.info(" - MinSplitSize: " + minSplitSize); String vocabFile = getConf().get("Ivory.FinalVocab"); DistributedCache.addCacheFile(new URI(vocabFile), conf); Path inputPath = new Path(PwsimEnvironment.getFileNameWithPars(indexPath, "TermDocs")); Path weightedVectorsPath = new Path(outputPath); if (fs.exists(weightedVectorsPath)) { sLogger.info("Output path already exists!"); return -1; }//w ww . j a va 2 s . c om conf.setJobName("GetWeightedIntDocVectors:" + collectionName); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(0); conf.setInt("mapred.min.split.size", minSplitSize); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setBoolean("Ivory.Normalize", getConf().getBoolean("Ivory.Normalize", false)); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, weightedVectorsPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(WeightedIntDocVector.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(WeightedIntDocVector.class); conf.setMapperClass(MyMapper.class); long startTime = System.currentTimeMillis(); RunningJob rj = JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); Counters counters = rj.getCounters(); long numOfDocs = (long) counters.findCounter(Docs.Total).getCounter(); return (int) numOfDocs; }