List of usage examples for org.apache.hadoop.mapred JobConf setBoolean
public void setBoolean(String name, boolean value)
name
property to a boolean
. From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
public static void repartitionMapReduce(Path inFile, Path outPath, CellInfo[] cellInfos, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outPath.getFileSystem(params); // Calculate number of partitions in output file // Copy blocksize from source file if it's globally indexed @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type if (cellInfos == null) { if (sindex.equals("grid")) { Rectangle input_mbr = FileMBR.fileMBR(inFile, params); long inFileSize = FileMBR.sizeOfLastProcessedFile; int num_partitions = calculateNumberOfPartitions(new Configuration(), inFileSize, outFs, outPath, blockSize);//from w w w. j av a 2s . c om GridInfo gridInfo = new GridInfo(input_mbr.x1, input_mbr.y1, input_mbr.x2, input_mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inFile, outPath, params); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } } JobConf job = new JobConf(params, Repartition.class); job.setJobName("Repartition"); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.operations.Repartition.java
License:Open Source License
/** * Repartitions an input file according to the given list of cells. * @param inFile The input raw file that needs to be indexed. * @param outPath The output path where the index will be written. * @param stockShape An instance of the shapes stored in the input file. * @param blockSize The block size for the constructed index. * @param cellInfos A predefined set of cells to use as a global index * @param sindex The type of index to build. * @param overwrite Whether to overwrite the output or not. * @throws IOException If an exception happens while preparing the job. */// w w w . j ava 2s. c o m public static void repartitionMapReduce(Path inFile, Path outPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("Repartition"); FileSystem outFs = outPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outPath)) { if (overwrite) outFs.delete(outPath, true); else throw new RuntimeException( "Output file '" + outPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); ShapeInputFormat.setInputPaths(job, inFile); job.setInputFormat(ShapeInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); if (blockSize != 0) { job.setLong("dfs.block.size", blockSize); job.setLong("fs.local.block.size", blockSize); } JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, OperationsParams params) throws IOException, InterruptedException { String sindex = params.get("sindex"); boolean overwrite = params.getBoolean("overwrite", false); Shape stockShape = params.getShape("shape"); FileSystem outFs = outputPath.getFileSystem(params); @SuppressWarnings("deprecation") final long blockSize = outFs.getDefaultBlockSize(); // Calculate the dimensions of each partition based on gindex type CellInfo[] cellInfos;//from www .j a v a2s . c o m if (sindex.equals("grid")) { Rectangle inputMBR = FileMBR.fileMBR(inputPaths[0], params); long inputFileSize = FileMBR.sizeOfLastProcessedFile; for (int i = 1; i < inputPaths.length; i++) { Rectangle currentInputMBR = FileMBR.fileMBR(inputPaths[i], params); inputMBR.expand(currentInputMBR); inputFileSize = inputFileSize + FileMBR.sizeOfLastProcessedFile; } int num_partitions = calculateNumberOfPartitions(new Configuration(), inputFileSize, outFs, outputPath, blockSize); GridInfo gridInfo = new GridInfo(inputMBR.x1, inputMBR.y1, inputMBR.x2, inputMBR.y2); gridInfo.calculateCellDimensions(num_partitions); cellInfos = gridInfo.getAllCells(); } else if (sindex.equals("rtree") || sindex.equals("r+tree") || sindex.equals("str") || sindex.equals("str+")) { // Pack in rectangles using an RTree cellInfos = packInRectangles(inputPaths, outputPath, params, null); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobConf job = new JobConf(params, RepartitionTemporal.class); job.setJobName("RepartitionTemporal"); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid, str+, and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); JobClient.runJob(job); }
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("RepartitionTemporal"); FileSystem outFs = outputPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else/*from www. j av a 2 s. c o m*/ throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); if (blockSize != 0) { job.setLong("dfs.block.size", blockSize); job.setLong("fs.local.block.size", blockSize); } JobClient.runJob(job); }
From source file:edu.umn.cs.sthadoop.operations.STJoin.java
License:Open Source License
/** * // ww w.j a v a 2 s . com * @param inputPath * @param outputPath * @param params * @return * @throws IOException * @throws Exception * @throws InterruptedException */ private static long stJoin(Path inputPath, Path outputPath, OperationsParams params) throws IOException, Exception, InterruptedException { JobConf conf = new JobConf(new Configuration(), STJoin.class); FileSystem outfs = outputPath.getFileSystem(conf); outfs.delete(outputPath, true); conf.setJobName("STJoin"); // pass params to the join map-reduce conf.set("timedistance", params.get("timedistance")); conf.set("spacedistance", params.get("spacedistance")); // conf.setMapOutputKeyClass(LongWritable.class); // conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); // Mapper settings conf.setMapperClass(STJoinMap.class); // conf.setReducerClass(STJoinReduce.class); // conf.setCombinerClass(STJoinReduce.class); conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setNumReduceTasks(0); JobClient.runJob(conf).waitForCompletion(); outfs = inputPath.getFileSystem(conf); outfs.delete(inputPath); return 0; }
From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java
License:Apache License
/** * Job config initialization (command-line params etc). *//*from w w w . j a v a2s . c om*/ protected JobConf initConf(String[] args) throws Exception { List<String> other_args = new ArrayList<String>(); Path configuration_file = null; boolean replication = false; for (int i = 0; i < args.length; ++i) { if (("-" + DBConst.DB_CONFIG_FILE).equals(args[i])) { configuration_file = new Path(args[++i]); } else if ("-replication".equals(args[i])) { replication = true; } else { other_args.add(args[i]); } } JobConf conf = null; conf = configureJob(other_args.toArray(new String[0])); LOG.info(conf.getJobName()); LOG.info(conf.get(DBConst.DB_SQL_QUERY)); if (conf.get(DBConst.DB_RELATION_ID) == null || conf.get(DBConst.DB_SQL_QUERY) == null || conf.get(DBConst.DB_RECORD_READER) == null) { throw new Exception( "ERROR: DB Job requires a relation, an SQL Query and a Record Reader class to be configured.\n" + "Please specify using: conf.set(\"" + DBConst.DB_RELATION_ID + "\", <relation name>), conf.set(\"" + DBConst.DB_SQL_QUERY + "\", <SQL QUERY>)\n" + "and code an appropriate Record Reader and specify conf.set(\"" + DBConst.DB_RECORD_READER + "\", <Record reader class name>)\n"); } if (replication) { conf.setBoolean(DBConst.DB_REPLICATION, true); } if (configuration_file == null) { if (conf.get(DBConst.DB_CONFIG_FILE) == null) { throw new Exception("No HadoopDB config file!"); } } else { conf.set(DBConst.DB_CONFIG_FILE, configuration_file.toString()); } setInputFormat(conf); return conf; }
From source file:eu.larkc.iris.Main.java
License:Apache License
private JobConf setupJob(Configuration conf) { JobConf jobConf = new JobConf(conf, Main.class); // run the job here. /* REAL CLUSTER */ jobConf.set("dfs.blocksize", "536870912"); jobConf.set("dfs.namenode.handler.count", "40"); //jobConf.set("dfs.replication", "1"); jobConf.set("mapreduce.reduce.shuffle.parallelcopies", "10"); jobConf.set("mapreduce.task.io.sort.factor", "100"); jobConf.set("mapreduce.task.io.sort.mb", "1024"); jobConf.set("io.file.buffer.size", "131072"); jobConf.set("mapred.child.java.opts", "-Xmx2560m"); jobConf.set("mapred.child.ulimit", "4194304"); jobConf.set("mapred.min.split.size", "536870912"); jobConf.set("mapreduce.input.fileinputformat.split.minsize", "536870912"); jobConf.set("mapreduce.reduce.merge.inmem.threshold", "0"); /**//*w ww.j a v a2 s . c o m*/ /* compression settings jobConf.set("mapreduce.map.output.compress", "false"); jobConf.set("mapreduce.output.fileoutputformat.compress", "true"); jobConf.set("mapreduce.output.fileoutputformat.compression.type", "BLOCK"); ~~~ */ //!!!IMPORTANT, if not : Caused by: java.io.FileNotFoundException: File does not exist: hdfs://ec2-50-19-191-200.compute-1.amazonaws.com:8020/user/root/lubm/facts/lubm50/data jobConf.setBoolean("mapred.input.dir.recursive", true); jobConf.set("cascading.serialization.tokens", "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable"); defaultConfiguration.flowProperties.put("cascading.serialization.tokens", "130=eu.larkc.iris.storage.IRIWritable,131=eu.larkc.iris.storage.StringTermWritable"); /* if( System.getProperty("log4j.logger") != null ) defaultConfiguration.flowProperties.put( "log4j.logger", System.getProperty("log4j.logger") ); */ //jobConf.set("mapred.min.split.size", "134217728"); //jobConf.set("mapred.child.java.opts", "-Xms64m -Xmx512m"); jobConf.setMapSpeculativeExecution(false); jobConf.setReduceSpeculativeExecution(false); //FIXME //jobConf.setNumMapTasks(8); jobConf.setNumReduceTasks(32); FlowConnector.setDebugLevel(defaultConfiguration.flowProperties, DebugLevel.VERBOSE); MultiMapReducePlanner.setJobConf(defaultConfiguration.flowProperties, jobConf); //Flow.setJobPollingInterval(defaultConfiguration.flowProperties, 500); return jobConf; }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w w w . j av a2 s .c o m*/ */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:io.prestosql.orc.OrcTester.java
License:Apache License
private static void assertFileContentsOrcHive(Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Reader reader = OrcFile.createReader(new Path(tempFile.getFile().getAbsolutePath()), new ReaderOptions(configuration)); org.apache.hadoop.hive.ql.io.orc.RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null;/* w ww.j ava2 s . co m*/ while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }
From source file:io.prestosql.orc.OrcTester.java
License:Apache License
private static void assertFileContentsDwrfHive(Type type, TempFile tempFile, Iterable<?> expectedValues) throws Exception { JobConf configuration = new JobConf(new Configuration(false)); configuration.set(READ_COLUMN_IDS_CONF_STR, "0"); configuration.setBoolean(READ_ALL_COLUMNS, false); Path path = new Path(tempFile.getFile().getAbsolutePath()); com.facebook.hive.orc.Reader reader = com.facebook.hive.orc.OrcFile .createReader(path.getFileSystem(configuration), path, configuration); boolean[] include = new boolean[reader.getTypes().size() + 100000]; Arrays.fill(include, true);//from w w w.jav a2s .com com.facebook.hive.orc.RecordReader recordReader = reader.rows(include); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); StructField field = rowInspector.getStructFieldRef("test"); Iterator<?> iterator = expectedValues.iterator(); Object rowData = null; while (recordReader.hasNext()) { rowData = recordReader.next(rowData); Object expectedValue = iterator.next(); Object actualValue = rowInspector.getStructFieldData(rowData, field); actualValue = decodeRecordReaderValue(type, actualValue); assertColumnValueEquals(type, actualValue, expectedValue); } assertFalse(iterator.hasNext()); }