List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java
License:Apache License
/** * Tests an index where the index pk is correct (indexed col values are indexed correctly), but * a covered index value is incorrect. Scrutiny should report the invalid row *//*from w w w .j ava 2 s.com*/ @Test public void testCoveredValueIncorrect() throws Exception { // insert one valid row upsertRow(dataTableUpsertStmt, 1, "name-1", 94010); conn.commit(); // disable index and insert another data row disableIndex(); upsertRow(dataTableUpsertStmt, 2, "name-2", 95123); conn.commit(); // insert a bad index row for the above data row upsertIndexRow("name-2", 2, 9999); conn.commit(); // scrutiny should report the bad row List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName); Job job = completedJobs.get(0); assertTrue(job.isSuccessful()); Counters counters = job.getCounters(); assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT)); assertEquals(1, getCounterValue(counters, INVALID_ROW_COUNT)); assertEquals(1, getCounterValue(counters, BAD_COVERED_COL_VAL_COUNT)); }
From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java
License:Apache License
/** * Test batching of row comparisons Inserts 1001 rows, with some random bad rows, and runs * scrutiny with batchsize of 10,/*w ww . j av a 2 s. c o m*/ */ @Test public void testBatching() throws Exception { // insert 1001 data and index rows int numTestRows = 1001; for (int i = 0; i < numTestRows; i++) { upsertRow(dataTableUpsertStmt, i, "name-" + i, i + 1000); } conn.commit(); disableIndex(); // randomly delete some rows from the index Random random = new Random(); for (int i = 0; i < 100; i++) { int idToDelete = random.nextInt(numTestRows); deleteRow(indexTableFullName, "WHERE \":ID\"=" + idToDelete); } conn.commit(); int numRows = countRows(indexTableFullName); int numDeleted = numTestRows - numRows; // run scrutiny with batch size of 10 List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName, System.currentTimeMillis(), 10L); Job job = completedJobs.get(0); assertTrue(job.isSuccessful()); Counters counters = job.getCounters(); assertEquals(numTestRows - numDeleted, getCounterValue(counters, VALID_ROW_COUNT)); assertEquals(numDeleted, getCounterValue(counters, INVALID_ROW_COUNT)); assertEquals(numTestRows / 10 + numTestRows % 10, getCounterValue(counters, BATCHES_PROCESSED_COUNT)); }
From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java
License:Apache License
/** * Tests when there are more data table rows than index table rows Scrutiny should report the * number of incorrect rows/*from w w w . j a v a 2s . c o m*/ */ @Test public void testMoreDataRows() throws Exception { upsertRow(dataTableUpsertStmt, 1, "name-1", 95123); conn.commit(); disableIndex(); // these rows won't have a corresponding index row upsertRow(dataTableUpsertStmt, 2, "name-2", 95124); upsertRow(dataTableUpsertStmt, 3, "name-3", 95125); conn.commit(); List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName); Job job = completedJobs.get(0); assertTrue(job.isSuccessful()); Counters counters = job.getCounters(); assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT)); assertEquals(2, getCounterValue(counters, INVALID_ROW_COUNT)); }
From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java
License:Apache License
/** * Tests when there are more index table rows than data table rows Scrutiny should report the * number of incorrect rows when run with the index as the source table *//* w w w . j av a 2s .co m*/ @Test public void testMoreIndexRows() throws Exception { upsertRow(dataTableUpsertStmt, 1, "name-1", 95123); conn.commit(); disableIndex(); // these index rows won't have a corresponding data row upsertIndexRow("name-2", 2, 95124); upsertIndexRow("name-3", 3, 95125); conn.commit(); List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName, System.currentTimeMillis(), 10L, SourceTable.INDEX_TABLE_SOURCE); Job job = completedJobs.get(0); assertTrue(job.isSuccessful()); Counters counters = job.getCounters(); assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT)); assertEquals(2, getCounterValue(counters, INVALID_ROW_COUNT)); }
From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java
License:Apache License
/** * Tests running with both the index and data tables as the source table If we have an * incorrectly indexed row, it should be reported in each direction *///from ww w. j a v a 2s . com @Test public void testBothDataAndIndexAsSource() throws Exception { // insert one valid row upsertRow(dataTableUpsertStmt, 1, "name-1", 94010); conn.commit(); // disable the index and insert another row which is not indexed disableIndex(); upsertRow(dataTableUpsertStmt, 2, "name-2", 95123); conn.commit(); // insert a bad row into the index upsertIndexRow("badName", 2, 9999); conn.commit(); List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName, System.currentTimeMillis(), 10L, SourceTable.BOTH); assertEquals(2, completedJobs.size()); for (Job job : completedJobs) { assertTrue(job.isSuccessful()); Counters counters = job.getCounters(); assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT)); assertEquals(1, getCounterValue(counters, INVALID_ROW_COUNT)); } }
From source file:org.apache.phoenix.mapreduce.index.IndexScrutinyTableOutput.java
License:Apache License
/** * Writes the results of the given jobs to the metadata table * @param conn connection to use/*from ww w . jav a2 s. c o m*/ * @param cmdLineArgs arguments the {@code IndexScrutinyTool} was run with * @param completedJobs completed MR jobs * @throws IOException * @throws SQLException */ public static void writeJobResults(Connection conn, String[] cmdLineArgs, List<Job> completedJobs) throws IOException, SQLException { PreparedStatement pStmt = conn.prepareStatement(UPSERT_METADATA_SQL); for (Job job : completedJobs) { Configuration conf = job.getConfiguration(); String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(conf); final PTable pdataTable = PhoenixRuntime.getTable(conn, qDataTable); final String qIndexTable = PhoenixConfigurationUtil.getScrutinyIndexTableName(conf); final PTable pindexTable = PhoenixRuntime.getTable(conn, qIndexTable); SourceTable sourceTable = PhoenixConfigurationUtil.getScrutinySourceTable(conf); long scrutinyExecuteTime = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(conf); SourceTargetColumnNames columnNames = SourceTable.DATA_TABLE_SOURCE.equals(sourceTable) ? new DataSourceColNames(pdataTable, pindexTable) : new IndexSourceColNames(pdataTable, pindexTable); Counters counters = job.getCounters(); int index = 1; pStmt.setString(index++, columnNames.getQualifiedSourceTableName()); pStmt.setString(index++, columnNames.getQualifiedTargetTableName()); pStmt.setLong(index++, scrutinyExecuteTime); pStmt.setString(index++, sourceTable.name()); pStmt.setString(index++, Arrays.toString(cmdLineArgs)); pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.INPUT_RECORDS).getValue()); pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.FAILED_RECORDS).getValue()); pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.VALID_ROW_COUNT).getValue()); pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.INVALID_ROW_COUNT).getValue()); pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.BAD_COVERED_COL_VAL_COUNT).getValue()); pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.BATCHES_PROCESSED_COUNT).getValue()); pStmt.setString(index++, Arrays.toString(columnNames.getSourceDynamicCols().toArray())); pStmt.setString(index++, Arrays.toString(columnNames.getTargetDynamicCols().toArray())); pStmt.setString(index++, getSqlQueryAllInvalidRows(conn, columnNames, scrutinyExecuteTime)); pStmt.setString(index++, getSqlQueryMissingTargetRows(conn, columnNames, scrutinyExecuteTime)); pStmt.setString(index++, getSqlQueryBadCoveredColVal(conn, columnNames, scrutinyExecuteTime)); pStmt.addBatch(); } pStmt.executeBatch(); conn.commit(); }
From source file:org.apache.rya.accumulo.mr.tools.RdfFileInputTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { init();//from w ww . jav a2s .co m Job job = Job.getInstance(conf, "Rdf File Input"); job.setJarByClass(RdfFileInputTool.class); String inputPath = conf.get(MRUtils.INPUT_PATH, args[0]); setupFileInput(job, inputPath, RDFFormat.RDFXML); setupRyaOutput(job); job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int exitCode = job.waitForCompletion(true) ? 0 : 1; if (exitCode == 0) { Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); long n = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS") .getValue(); System.out.println(n + " statement(s) inserted to Rya."); } else { System.out.println("Job Failed!!!"); } return exitCode; }
From source file:org.apache.rya.accumulo.pig.IndexWritingTool.java
License:Apache License
@Override public int run(final String[] args) throws Exception { Preconditions.checkArgument(args.length == 7, "java " + IndexWritingTool.class.getCanonicalName() + " hdfsSaveLocation sparqlFile cbinstance cbzk cbuser cbpassword rdfTablePrefix."); final String inputDir = args[0]; final String sparqlFile = args[1]; final String instStr = args[2]; final String zooStr = args[3]; final String userStr = args[4]; final String passStr = args[5]; final String tablePrefix = args[6]; final String sparql = FileUtils.readFileToString(new File(sparqlFile)); final Job job = new Job(getConf(), "Write HDFS Index to Accumulo"); job.setJarByClass(this.getClass()); final Configuration jobConf = job.getConfiguration(); jobConf.setBoolean("mapred.map.tasks.speculative.execution", false); setVarOrders(sparql, jobConf);// ww w . ja va2s . c om TextInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Mutation.class); job.setNumReduceTasks(0); String tableName; if (zooStr.equals("mock")) { tableName = tablePrefix; } else { tableName = tablePrefix + "INDEX_" + UUID.randomUUID().toString().replace("-", "").toUpperCase(); } setAccumuloOutput(instStr, zooStr, userStr, passStr, job, tableName); jobConf.set(sparql_key, sparql); final int complete = job.waitForCompletion(true) ? 0 : -1; if (complete == 0) { final String[] varOrders = jobConf.getStrings("varOrders"); final String orders = Joiner.on("\u0000").join(varOrders); Instance inst; if (zooStr.equals("mock")) { inst = new MockInstance(instStr); } else { inst = new ZooKeeperInstance(instStr, zooStr); } final Connector conn = inst.getConnector(userStr, passStr.getBytes(StandardCharsets.UTF_8)); final BatchWriter bw = conn.createBatchWriter(tableName, 10, 5000, 1); final Counters counters = job.getCounters(); final Counter c1 = counters.findCounter(cardCounter, cardCounter); final Mutation m = new Mutation("~SPARQL"); final Value v = new Value(sparql.getBytes(StandardCharsets.UTF_8)); m.put(new Text("" + c1.getValue()), new Text(orders), v); bw.addMutation(m); bw.close(); return complete; } else { return complete; } }
From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java
License:Apache License
@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true);// ww w . java2s . c om String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, "UTF-8"); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] { "--fanout=2" }); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" }, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] { "--dry-run" }); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
From source file:org.apache.sqoop.config.ConfigurationHelper.java
License:Apache License
/** * @return the number of mapper output records from a job using its counters. *//* ww w. j a va2s. co m*/ public static long getNumMapOutputRecords(Job job) throws IOException, InterruptedException { return job.getCounters().findCounter(ConfigurationConstants.COUNTER_GROUP_MAPRED_TASK_COUNTERS, ConfigurationConstants.COUNTER_MAP_OUTPUT_RECORDS).getValue(); }