List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:org.apache.blur.mapreduce.lib.BlurInputFormatTest.java
License:Apache License
private void runTest(String tableName, boolean disableFast, Path fileCache) throws IOException, BlurException, TException, InterruptedException, ClassNotFoundException { FileSystem fileSystem = miniCluster.getFileSystem(); Path root = new Path(fileSystem.getUri() + "/"); creatTable(tableName, new Path(root, "tables"), disableFast); loadTable(tableName, 100, 100);//from ww w.ja v a2 s .c om Iface client = getClient(); TableDescriptor tableDescriptor = client.describe(tableName); Job job = Job.getInstance(conf, "Read Data"); job.setJarByClass(BlurInputFormatTest.class); job.setMapperClass(TestMapper.class); job.setInputFormatClass(BlurInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TableBlurRecord.class); Path output = new Path(new Path(root, "output"), tableName); String snapshot = UUID.randomUUID().toString(); client.createSnapshot(tableName, snapshot); if (fileCache != null) { BlurInputFormat.setLocalCachePath(job, fileCache); } BlurInputFormat.setMaxNumberOfMaps(job, 1); BlurInputFormat.setZooKeeperConnectionStr(job, miniCluster.getZkConnectionString()); BlurInputFormat.addTable(job, tableDescriptor, snapshot); FileOutputFormat.setOutputPath(job, output); try { assertTrue(job.waitForCompletion(true)); Counters counters = job.getCounters(); assertMapTask(1, counters); } finally { client.removeSnapshot(tableName, snapshot); } final Map<Text, TableBlurRecord> results = new TreeMap<Text, TableBlurRecord>(); walkOutput(output, conf, new ResultReader() { @Override public void read(Text rowId, TableBlurRecord tableBlurRecord) { results.put(new Text(rowId), new TableBlurRecord(tableBlurRecord)); } }); int rowId = 100; for (Entry<Text, TableBlurRecord> e : results.entrySet()) { Text r = e.getKey(); assertEquals(new Text("row-" + rowId), r); BlurRecord blurRecord = new BlurRecord(); blurRecord.setRowId("row-" + rowId); blurRecord.setRecordId("record-" + rowId); blurRecord.setFamily("fam0"); blurRecord.addColumn("col0", "value-" + rowId); TableBlurRecord tableBlurRecord = new TableBlurRecord(new Text(tableName), blurRecord); assertEquals(tableBlurRecord, e.getValue()); rowId++; } assertEquals(200, rowId); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatMiniClusterTest.java
License:Apache License
@Test public void testBlurOutputFormat() throws IOException, InterruptedException, ClassNotFoundException, BlurException, TException { fileSystem.delete(inDir, true);/*ww w. jav a 2 s . com*/ String tableName = "testBlurOutputFormat"; writeRecordsFile("in/part1", 1, 1, 1, 1, "cf1"); writeRecordsFile("in/part2", 1, 1, 2, 1, "cf1"); Job job = Job.getInstance(conf, "blur index"); job.setJarByClass(BlurOutputFormatMiniClusterTest.class); job.setMapperClass(CsvBlurMapper.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in")); String tableUri = new Path(TEST_ROOT_DIR + "/blur/" + tableName) .makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()).toString(); CsvBlurMapper.addColumns(job, "cf1", "col"); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setShardCount(1); tableDescriptor.setTableUri(tableUri); tableDescriptor.setName(tableName); Iface client = getClient(); client.createTable(tableDescriptor); BlurOutputFormat.setupJob(job, tableDescriptor); Path output = new Path(TEST_ROOT_DIR + "/out"); BlurOutputFormat.setOutputPath(job, output); Path tablePath = new Path(tableUri); Path shardPath = new Path(tablePath, ShardUtil.getShardName(0)); FileStatus[] listStatus = fileSystem.listStatus(shardPath); assertEquals(3, listStatus.length); System.out.println("======" + listStatus.length); for (FileStatus fileStatus : listStatus) { System.out.println(fileStatus.getPath()); } assertTrue(job.waitForCompletion(true)); Counters ctrs = job.getCounters(); System.out.println("Counters: " + ctrs); client.loadData(tableName, output.toString()); while (true) { TableStats tableStats = client.tableStats(tableName); System.out.println(tableStats); if (tableStats.getRowCount() > 0) { break; } Thread.sleep(100); } assertTrue(fileSystem.exists(tablePath)); assertFalse(fileSystem.isFile(tablePath)); FileStatus[] listStatusAfter = fileSystem.listStatus(shardPath); assertEquals(11, listStatusAfter.length); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java
License:Apache License
@Test public void testBlurOutputFormat() throws IOException, InterruptedException, ClassNotFoundException { Path input = getInDir();/*from www . j av a 2s.c o m*/ Path output = getOutDir(); _fileSystem.delete(input, true); _fileSystem.delete(output, true); writeRecordsFile(new Path(input, "part1"), 1, 1, 1, 1, "cf1"); writeRecordsFile(new Path(input, "part2"), 1, 1, 2, 1, "cf1"); Job job = Job.getInstance(_conf, "blur index"); job.setJarByClass(BlurOutputFormatTest.class); job.setMapperClass(CsvBlurMapper.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, input); CsvBlurMapper.addColumns(job, "cf1", "col"); Path tablePath = new Path(new Path(_root, "table"), "test"); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setShardCount(1); tableDescriptor.setTableUri(tablePath.toString()); tableDescriptor.setName("test"); createShardDirectories(tablePath, 1); BlurOutputFormat.setupJob(job, tableDescriptor); BlurOutputFormat.setOutputPath(job, output); assertTrue(job.waitForCompletion(true)); Counters ctrs = job.getCounters(); System.out.println("Counters: " + ctrs); Path path = new Path(output, ShardUtil.getShardName(0)); dump(path, _conf); Collection<Path> commitedTasks = getCommitedTasks(path); assertEquals(1, commitedTasks.size()); DirectoryReader reader = DirectoryReader.open(new HdfsDirectory(_conf, commitedTasks.iterator().next())); assertEquals(2, reader.numDocs()); reader.close(); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java
License:Apache License
@Test public void testBlurOutputFormatOverFlowTest() throws IOException, InterruptedException, ClassNotFoundException { Path input = getInDir();//from ww w . j av a2 s .c om Path output = getOutDir(); _fileSystem.delete(input, true); _fileSystem.delete(output, true); // 1500 * 50 = 75,000 writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1"); // 100 * 50 = 5,000 writeRecordsFile(new Path(input, "part2"), 1, 50, 2000, 100, "cf1"); Job job = Job.getInstance(_conf, "blur index"); job.setJarByClass(BlurOutputFormatTest.class); job.setMapperClass(CsvBlurMapper.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, input); CsvBlurMapper.addColumns(job, "cf1", "col"); Path tablePath = new Path(new Path(_root, "table"), "test"); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setShardCount(1); tableDescriptor.setTableUri(tablePath.toString()); tableDescriptor.setName("test"); createShardDirectories(tablePath, 1); BlurOutputFormat.setupJob(job, tableDescriptor); BlurOutputFormat.setOutputPath(job, output); BlurOutputFormat.setIndexLocally(job, true); BlurOutputFormat.setOptimizeInFlight(job, false); assertTrue(job.waitForCompletion(true)); Counters ctrs = job.getCounters(); System.out.println("Counters: " + ctrs); Path path = new Path(output, ShardUtil.getShardName(0)); Collection<Path> commitedTasks = getCommitedTasks(path); assertEquals(1, commitedTasks.size()); DirectoryReader reader = DirectoryReader.open(new HdfsDirectory(_conf, commitedTasks.iterator().next())); assertEquals(80000, reader.numDocs()); reader.close(); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java
License:Apache License
@Test public void testBlurOutputFormatOverFlowMultipleReducersTest() throws IOException, InterruptedException, ClassNotFoundException { Path input = getInDir();/*from w ww. ja v a 2 s . c o m*/ Path output = getOutDir(); _fileSystem.delete(input, true); _fileSystem.delete(output, true); // 1500 * 50 = 75,000 writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1"); // 100 * 50 = 5,000 writeRecordsFile(new Path(input, "part2"), 1, 50, 2000, 100, "cf1"); Job job = Job.getInstance(_conf, "blur index"); job.setJarByClass(BlurOutputFormatTest.class); job.setMapperClass(CsvBlurMapper.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, input); CsvBlurMapper.addColumns(job, "cf1", "col"); Path tablePath = new Path(new Path(_root, "table"), "test"); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setShardCount(2); tableDescriptor.setTableUri(tablePath.toString()); tableDescriptor.setName("test"); createShardDirectories(output, 2); BlurOutputFormat.setupJob(job, tableDescriptor); BlurOutputFormat.setOutputPath(job, output); BlurOutputFormat.setIndexLocally(job, false); BlurOutputFormat.setDocumentBufferStrategy(job, DocumentBufferStrategyHeapSize.class); BlurOutputFormat.setMaxDocumentBufferHeapSize(job, 128 * 1024); assertTrue(job.waitForCompletion(true)); Counters ctrs = job.getCounters(); System.out.println("Counters: " + ctrs); long total = 0; for (int i = 0; i < tableDescriptor.getShardCount(); i++) { Path path = new Path(output, ShardUtil.getShardName(i)); Collection<Path> commitedTasks = getCommitedTasks(path); assertEquals(1, commitedTasks.size()); DirectoryReader reader = DirectoryReader .open(new HdfsDirectory(_conf, commitedTasks.iterator().next())); total += reader.numDocs(); reader.close(); } assertEquals(80000, total); }
From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java
License:Apache License
@Test public void testBlurOutputFormatOverFlowMultipleReducersWithReduceMultiplierTest() throws IOException, InterruptedException, ClassNotFoundException { Path input = getInDir();/*from w w w . j av a2s. co m*/ Path output = getOutDir(); _fileSystem.delete(input, true); _fileSystem.delete(output, true); // 1500 * 50 = 75,000 writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1"); // 100 * 50 = 5,000 writeRecordsFile(new Path(input, "part2"), 1, 50, 2000, 100, "cf1"); Job job = Job.getInstance(_conf, "blur index"); job.setJarByClass(BlurOutputFormatTest.class); job.setMapperClass(CsvBlurMapper.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, input); CsvBlurMapper.addColumns(job, "cf1", "col"); Path tablePath = new Path(new Path(_root, "table"), "test"); TableDescriptor tableDescriptor = new TableDescriptor(); tableDescriptor.setShardCount(7); tableDescriptor.setTableUri(tablePath.toString()); tableDescriptor.setName("test"); createShardDirectories(output, 7); BlurOutputFormat.setupJob(job, tableDescriptor); BlurOutputFormat.setOutputPath(job, output); int multiple = 2; BlurOutputFormat.setReducerMultiplier(job, multiple); assertTrue(job.waitForCompletion(true)); Counters ctrs = job.getCounters(); System.out.println("Counters: " + ctrs); long total = 0; for (int i = 0; i < tableDescriptor.getShardCount(); i++) { Path path = new Path(output, ShardUtil.getShardName(i)); Collection<Path> commitedTasks = getCommitedTasks(path); assertTrue(commitedTasks.size() >= multiple); for (Path p : commitedTasks) { DirectoryReader reader = DirectoryReader.open(new HdfsDirectory(_conf, p)); total += reader.numDocs(); reader.close(); } } assertEquals(80000, total); }
From source file:org.apache.blur.mapreduce.lib.update.Driver.java
License:Apache License
@Override public int run(String[] args) throws Exception { int c = 0;//from www.j a v a 2 s .c om if (args.length < 5) { System.err.println( "Usage Driver <table> <mr inc working path> <output path> <zk connection> <reducer multipler> <extra config files...>"); } String table = args[c++]; String mrIncWorkingPathStr = args[c++]; String outputPathStr = args[c++]; String blurZkConnection = args[c++]; int reducerMultipler = Integer.parseInt(args[c++]); for (; c < args.length; c++) { String externalConfigFileToAdd = args[c]; getConf().addResource(new Path(externalConfigFileToAdd)); } Path outputPath = new Path(outputPathStr); Path mrIncWorkingPath = new Path(mrIncWorkingPathStr); FileSystem fileSystem = mrIncWorkingPath.getFileSystem(getConf()); Path newData = new Path(mrIncWorkingPath, NEW); Path inprogressData = new Path(mrIncWorkingPath, INPROGRESS); Path completeData = new Path(mrIncWorkingPath, COMPLETE); Path fileCache = new Path(mrIncWorkingPath, CACHE); fileSystem.mkdirs(newData); fileSystem.mkdirs(inprogressData); fileSystem.mkdirs(completeData); fileSystem.mkdirs(fileCache); List<Path> srcPathList = new ArrayList<Path>(); for (FileStatus fileStatus : fileSystem.listStatus(newData)) { srcPathList.add(fileStatus.getPath()); } if (srcPathList.isEmpty()) { return 0; } List<Path> inprogressPathList = new ArrayList<Path>(); boolean success = false; Iface client = null; try { inprogressPathList = movePathList(fileSystem, inprogressData, srcPathList); Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]"); client = BlurClient.getClientFromZooKeeperConnectionStr(blurZkConnection); waitForOtherSnapshotsToBeRemoved(client, table, MRUPDATE_SNAPSHOT); client.createSnapshot(table, MRUPDATE_SNAPSHOT); TableDescriptor descriptor = client.describe(table); Path tablePath = new Path(descriptor.getTableUri()); BlurInputFormat.setLocalCachePath(job, fileCache); BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT); MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, MapperForExistingData.class); for (Path p : inprogressPathList) { FileInputFormat.addInputPath(job, p); MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, MapperForNewData.class); } BlurOutputFormat.setOutputPath(job, outputPath); BlurOutputFormat.setupJob(job, descriptor); job.setReducerClass(UpdateReducer.class); job.setMapOutputKeyClass(IndexKey.class); job.setMapOutputValueClass(IndexValue.class); job.setPartitionerClass(IndexKeyPartitioner.class); job.setGroupingComparatorClass(IndexKeyWritableComparator.class); BlurOutputFormat.setReducerMultiplier(job, reducerMultipler); success = job.waitForCompletion(true); Counters counters = job.getCounters(); LOG.info("Counters [" + counters + "]"); } finally { if (success) { LOG.info("Indexing job succeeded!"); movePathList(fileSystem, completeData, inprogressPathList); } else { LOG.error("Indexing job failed!"); movePathList(fileSystem, newData, inprogressPathList); } if (client != null) { client.removeSnapshot(table, MRUPDATE_SNAPSHOT); } } if (success) { return 0; } else { return 1; } }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }//from w w w . j av a 2 s .c o m final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.falcon.job.JobCounters.java
License:Apache License
public void obtainJobCounters(Configuration conf, Job job, boolean isDistCp) throws IOException { try {/* www . j ava 2 s . c o m*/ long timeTaken = job.getFinishTime() - job.getStartTime(); countersMap.put(ReplicationJobCountersList.TIMETAKEN.getName(), timeTaken); Counters jobCounters = job.getCounters(); parseJob(job, jobCounters, isDistCp); } catch (Exception e) { LOG.info("Exception occurred while obtaining job counters: {}", e); } }
From source file:org.apache.giraph.utils.CounterUtils.java
License:Apache License
/** * Wait for a counter to appear in a group and then return the name of that * counter. If job finishes before counter appears, return null. * * @param job Job// w w w . j a v a2s .co m * @param group Name of the counter group * @return Name of the counter inside of the group, or null if job finishes * before counter appears */ public static String waitAndGetCounterNameFromGroup(Job job, String group) { try { while (job.getCounters().getGroup(group).size() == 0) { if (job.isComplete()) { return null; } Thread.sleep(SLEEP_MSECS); } return job.getCounters().getGroup(group).iterator().next().getName(); } catch (IOException | InterruptedException e) { throw new IllegalStateException("waitAndGetCounterNameFromGroup: Exception occurred", e); } }