Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

/**
 * Tests an index where the index pk is correct (indexed col values are indexed correctly), but
 * a covered index value is incorrect. Scrutiny should report the invalid row
 *//*from  w  w  w  .j  ava  2  s.com*/
@Test
public void testCoveredValueIncorrect() throws Exception {
    // insert one valid row
    upsertRow(dataTableUpsertStmt, 1, "name-1", 94010);
    conn.commit();

    // disable index and insert another data row
    disableIndex();
    upsertRow(dataTableUpsertStmt, 2, "name-2", 95123);
    conn.commit();

    // insert a bad index row for the above data row
    upsertIndexRow("name-2", 2, 9999);
    conn.commit();

    // scrutiny should report the bad row
    List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName);
    Job job = completedJobs.get(0);
    assertTrue(job.isSuccessful());
    Counters counters = job.getCounters();
    assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT));
    assertEquals(1, getCounterValue(counters, INVALID_ROW_COUNT));
    assertEquals(1, getCounterValue(counters, BAD_COVERED_COL_VAL_COUNT));
}

From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

/**
 * Test batching of row comparisons Inserts 1001 rows, with some random bad rows, and runs
 * scrutiny with batchsize of 10,/*w  ww  .  j  av  a 2  s. c  o m*/
 */
@Test
public void testBatching() throws Exception {
    // insert 1001 data and index rows
    int numTestRows = 1001;
    for (int i = 0; i < numTestRows; i++) {
        upsertRow(dataTableUpsertStmt, i, "name-" + i, i + 1000);
    }
    conn.commit();

    disableIndex();

    // randomly delete some rows from the index
    Random random = new Random();
    for (int i = 0; i < 100; i++) {
        int idToDelete = random.nextInt(numTestRows);
        deleteRow(indexTableFullName, "WHERE \":ID\"=" + idToDelete);
    }
    conn.commit();
    int numRows = countRows(indexTableFullName);
    int numDeleted = numTestRows - numRows;

    // run scrutiny with batch size of 10
    List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName, System.currentTimeMillis(),
            10L);
    Job job = completedJobs.get(0);
    assertTrue(job.isSuccessful());
    Counters counters = job.getCounters();
    assertEquals(numTestRows - numDeleted, getCounterValue(counters, VALID_ROW_COUNT));
    assertEquals(numDeleted, getCounterValue(counters, INVALID_ROW_COUNT));
    assertEquals(numTestRows / 10 + numTestRows % 10, getCounterValue(counters, BATCHES_PROCESSED_COUNT));
}

From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

/**
 * Tests when there are more data table rows than index table rows Scrutiny should report the
 * number of incorrect rows/*from w  w w  . j  a v a  2s  .  c o m*/
 */
@Test
public void testMoreDataRows() throws Exception {
    upsertRow(dataTableUpsertStmt, 1, "name-1", 95123);
    conn.commit();
    disableIndex();
    // these rows won't have a corresponding index row
    upsertRow(dataTableUpsertStmt, 2, "name-2", 95124);
    upsertRow(dataTableUpsertStmt, 3, "name-3", 95125);
    conn.commit();

    List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName);
    Job job = completedJobs.get(0);
    assertTrue(job.isSuccessful());
    Counters counters = job.getCounters();
    assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT));
    assertEquals(2, getCounterValue(counters, INVALID_ROW_COUNT));
}

From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

/**
 * Tests when there are more index table rows than data table rows Scrutiny should report the
 * number of incorrect rows when run with the index as the source table
 *//*  w  w  w .  j  av  a  2s .co  m*/
@Test
public void testMoreIndexRows() throws Exception {
    upsertRow(dataTableUpsertStmt, 1, "name-1", 95123);
    conn.commit();
    disableIndex();
    // these index rows won't have a corresponding data row
    upsertIndexRow("name-2", 2, 95124);
    upsertIndexRow("name-3", 3, 95125);
    conn.commit();

    List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName, System.currentTimeMillis(),
            10L, SourceTable.INDEX_TABLE_SOURCE);
    Job job = completedJobs.get(0);
    assertTrue(job.isSuccessful());
    Counters counters = job.getCounters();
    assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT));
    assertEquals(2, getCounterValue(counters, INVALID_ROW_COUNT));
}

From source file:org.apache.phoenix.end2end.IndexScrutinyToolIT.java

License:Apache License

/**
 * Tests running with both the index and data tables as the source table If we have an
 * incorrectly indexed row, it should be reported in each direction
 *///from   ww  w.  j  a  v  a  2s . com
@Test
public void testBothDataAndIndexAsSource() throws Exception {
    // insert one valid row
    upsertRow(dataTableUpsertStmt, 1, "name-1", 94010);
    conn.commit();

    // disable the index and insert another row which is not indexed
    disableIndex();
    upsertRow(dataTableUpsertStmt, 2, "name-2", 95123);
    conn.commit();

    // insert a bad row into the index
    upsertIndexRow("badName", 2, 9999);
    conn.commit();

    List<Job> completedJobs = runScrutiny(schemaName, dataTableName, indexTableName, System.currentTimeMillis(),
            10L, SourceTable.BOTH);
    assertEquals(2, completedJobs.size());
    for (Job job : completedJobs) {
        assertTrue(job.isSuccessful());
        Counters counters = job.getCounters();
        assertEquals(1, getCounterValue(counters, VALID_ROW_COUNT));
        assertEquals(1, getCounterValue(counters, INVALID_ROW_COUNT));
    }
}

From source file:org.apache.phoenix.mapreduce.index.IndexScrutinyTableOutput.java

License:Apache License

/**
 * Writes the results of the given jobs to the metadata table
 * @param conn connection to use/*from  ww w  . jav a2  s.  c  o  m*/
 * @param cmdLineArgs arguments the {@code IndexScrutinyTool} was run with
 * @param completedJobs completed MR jobs
 * @throws IOException
 * @throws SQLException
 */
public static void writeJobResults(Connection conn, String[] cmdLineArgs, List<Job> completedJobs)
        throws IOException, SQLException {
    PreparedStatement pStmt = conn.prepareStatement(UPSERT_METADATA_SQL);
    for (Job job : completedJobs) {
        Configuration conf = job.getConfiguration();
        String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(conf);
        final PTable pdataTable = PhoenixRuntime.getTable(conn, qDataTable);
        final String qIndexTable = PhoenixConfigurationUtil.getScrutinyIndexTableName(conf);
        final PTable pindexTable = PhoenixRuntime.getTable(conn, qIndexTable);
        SourceTable sourceTable = PhoenixConfigurationUtil.getScrutinySourceTable(conf);
        long scrutinyExecuteTime = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(conf);
        SourceTargetColumnNames columnNames = SourceTable.DATA_TABLE_SOURCE.equals(sourceTable)
                ? new DataSourceColNames(pdataTable, pindexTable)
                : new IndexSourceColNames(pdataTable, pindexTable);

        Counters counters = job.getCounters();
        int index = 1;
        pStmt.setString(index++, columnNames.getQualifiedSourceTableName());
        pStmt.setString(index++, columnNames.getQualifiedTargetTableName());
        pStmt.setLong(index++, scrutinyExecuteTime);
        pStmt.setString(index++, sourceTable.name());
        pStmt.setString(index++, Arrays.toString(cmdLineArgs));
        pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.INPUT_RECORDS).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixJobCounters.FAILED_RECORDS).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.VALID_ROW_COUNT).getValue());
        pStmt.setLong(index++, counters.findCounter(PhoenixScrutinyJobCounters.INVALID_ROW_COUNT).getValue());
        pStmt.setLong(index++,
                counters.findCounter(PhoenixScrutinyJobCounters.BAD_COVERED_COL_VAL_COUNT).getValue());
        pStmt.setLong(index++,
                counters.findCounter(PhoenixScrutinyJobCounters.BATCHES_PROCESSED_COUNT).getValue());
        pStmt.setString(index++, Arrays.toString(columnNames.getSourceDynamicCols().toArray()));
        pStmt.setString(index++, Arrays.toString(columnNames.getTargetDynamicCols().toArray()));
        pStmt.setString(index++, getSqlQueryAllInvalidRows(conn, columnNames, scrutinyExecuteTime));
        pStmt.setString(index++, getSqlQueryMissingTargetRows(conn, columnNames, scrutinyExecuteTime));
        pStmt.setString(index++, getSqlQueryBadCoveredColVal(conn, columnNames, scrutinyExecuteTime));
        pStmt.addBatch();
    }
    pStmt.executeBatch();
    conn.commit();
}

From source file:org.apache.rya.accumulo.mr.tools.RdfFileInputTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    init();//from  w ww .  jav  a2s  .co  m
    Job job = Job.getInstance(conf, "Rdf File Input");
    job.setJarByClass(RdfFileInputTool.class);

    String inputPath = conf.get(MRUtils.INPUT_PATH, args[0]);
    setupFileInput(job, inputPath, RDFFormat.RDFXML);
    setupRyaOutput(job);
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int exitCode = job.waitForCompletion(true) ? 0 : 1;

    if (exitCode == 0) {
        Date end_time = new Date();
        System.out.println("Job ended: " + end_time);
        System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
        long n = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS")
                .getValue();
        System.out.println(n + " statement(s) inserted to Rya.");
    } else {
        System.out.println("Job Failed!!!");
    }
    return exitCode;
}

From source file:org.apache.rya.accumulo.pig.IndexWritingTool.java

License:Apache License

@Override
public int run(final String[] args) throws Exception {
    Preconditions.checkArgument(args.length == 7, "java " + IndexWritingTool.class.getCanonicalName()
            + " hdfsSaveLocation sparqlFile cbinstance cbzk cbuser cbpassword rdfTablePrefix.");

    final String inputDir = args[0];
    final String sparqlFile = args[1];
    final String instStr = args[2];
    final String zooStr = args[3];
    final String userStr = args[4];
    final String passStr = args[5];
    final String tablePrefix = args[6];

    final String sparql = FileUtils.readFileToString(new File(sparqlFile));

    final Job job = new Job(getConf(), "Write HDFS Index to Accumulo");
    job.setJarByClass(this.getClass());

    final Configuration jobConf = job.getConfiguration();
    jobConf.setBoolean("mapred.map.tasks.speculative.execution", false);
    setVarOrders(sparql, jobConf);//  ww w  . ja va2s .  c om

    TextInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Mutation.class);

    job.setNumReduceTasks(0);

    String tableName;
    if (zooStr.equals("mock")) {
        tableName = tablePrefix;
    } else {
        tableName = tablePrefix + "INDEX_" + UUID.randomUUID().toString().replace("-", "").toUpperCase();
    }
    setAccumuloOutput(instStr, zooStr, userStr, passStr, job, tableName);

    jobConf.set(sparql_key, sparql);

    final int complete = job.waitForCompletion(true) ? 0 : -1;

    if (complete == 0) {

        final String[] varOrders = jobConf.getStrings("varOrders");
        final String orders = Joiner.on("\u0000").join(varOrders);
        Instance inst;

        if (zooStr.equals("mock")) {
            inst = new MockInstance(instStr);
        } else {
            inst = new ZooKeeperInstance(instStr, zooStr);
        }

        final Connector conn = inst.getConnector(userStr, passStr.getBytes(StandardCharsets.UTF_8));
        final BatchWriter bw = conn.createBatchWriter(tableName, 10, 5000, 1);

        final Counters counters = job.getCounters();
        final Counter c1 = counters.findCounter(cardCounter, cardCounter);

        final Mutation m = new Mutation("~SPARQL");
        final Value v = new Value(sparql.getBytes(StandardCharsets.UTF_8));
        m.put(new Text("" + c1.getValue()), new Text(orders), v);
        bw.addMutation(m);

        bw.close();

        return complete;
    } else {
        return complete;
    }

}

From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java

License:Apache License

@Test
public void mrRun() throws Exception {
    FileSystem fs = dfsCluster.getFileSystem();
    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
    fs.delete(inDir, true);//  ww  w . java2s  . c om
    String DATADIR = "/user/testing/testMapperReducer/data";
    Path dataDir = fs.makeQualified(new Path(DATADIR));
    fs.delete(dataDir, true);
    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
    fs.delete(outDir, true);

    assertTrue(fs.mkdirs(inDir));
    Path INPATH = new Path(inDir, "input.txt");
    OutputStream os = fs.create(INPATH);
    Writer wr = new OutputStreamWriter(os, "UTF-8");
    wr.write(DATADIR + "/" + inputAvroFile);
    wr.close();

    assertTrue(fs.mkdirs(dataDir));
    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);

    JobConf jobConf = getJobConf();
    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
        jobConf.set("mapred.job.tracker", "local");
    }
    jobConf.setMaxMapAttempts(1);
    jobConf.setMaxReduceAttempts(1);
    jobConf.setJar(SEARCH_ARCHIVES_JAR);
    jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);

    int shards = 2;
    int maxReducers = Integer.MAX_VALUE;
    if (ENABLE_LOCAL_JOB_RUNNER) {
        // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
        // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
        maxReducers = 1;
        shards = 1;
    }

    String[] args = new String[] {
            "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf",
            "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
            "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose",
            numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
            numRuns % 3 == 0 ? "--reducers=" + shards
                    : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) };
    if (numRuns % 3 == 2) {
        args = concat(args, new String[] { "--fanout=2" });
    }
    if (numRuns == 0) {
        // force (slow) MapReduce based randomization to get coverage for that as well
        args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" },
                args);
    }
    MapReduceIndexerTool tool = createTool();
    int res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);
    Job job = tool.job;
    assertTrue(job.isComplete());
    assertTrue(job.isSuccessful());

    if (numRuns % 3 != 2) {
        // Only run this check if mtree merge is disabled.
        // With mtree merge enabled the BatchWriter counters aren't available anymore because 
        // variable "job" now refers to the merge job rather than the indexing job
        assertEquals(
                "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
                count,
                job.getCounters()
                        .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString())
                        .getValue());
    }

    // Check the output is as expected
    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));

    System.out.println("outputfiles:" + Arrays.toString(outputFiles));

    TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);

    // run again with --dryrun mode:  
    tool = createTool();
    args = concat(args, new String[] { "--dry-run" });
    res = ToolRunner.run(jobConf, tool, args);
    assertEquals(0, res);

    numRuns++;
}

From source file:org.apache.sqoop.config.ConfigurationHelper.java

License:Apache License

/**
 * @return the number of mapper output records from a job using its counters.
 *//*  ww  w. j a va2s. co m*/
public static long getNumMapOutputRecords(Job job) throws IOException, InterruptedException {
    return job.getCounters().findCounter(ConfigurationConstants.COUNTER_GROUP_MAPRED_TASK_COUNTERS,
            ConfigurationConstants.COUNTER_MAP_OUTPUT_RECORDS).getValue();
}