List of usage examples for org.apache.hadoop.mapred JobConf setMaxMapAttempts
public void setMaxMapAttempts(int n)
From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java
License:Apache License
@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true);/*from ww w . j a v a 2s .co m*/ String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, "UTF-8"); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] { "--fanout=2" }); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" }, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] { "--dry-run" }); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java
License:Apache License
@Override public void doTest() throws Exception { waitForRecoveriesToFinish(false);/*from ww w .j ava 2 s .c om*/ FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true); String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1); JobConf jobConf = getJobConf(); // enable mapred.job.tracker = local to run in debugger and set breakpoints // jobConf.set("mapred.job.tracker", "local"); jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); MapReduceIndexerTool tool; int res; QueryResponse results; HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url); String[] args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--mappers=3", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url", cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url", cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1), "--verbose", "--go-live" }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = server.query(new SolrQuery("*:*")); assertEquals(20, results.getResults().getNumFound()); } fs.delete(inDir, true); fs.delete(outDir, true); fs.delete(dataDir, true); assertTrue(fs.mkdirs(inDir)); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2); args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--mappers=3", "--verbose", "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url", cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url", cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1) }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = server.query(new SolrQuery("*:*")); assertEquals(22, results.getResults().getNumFound()); } // try using zookeeper String collection = "collection1"; if (random().nextBoolean()) { // sometimes, use an alias createAlias("updatealias", "collection1"); collection = "updatealias"; } fs.delete(inDir, true); fs.delete(outDir, true); fs.delete(dataDir, true); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); args = new String[] { "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--zk-host", zkServer.getZkAddress(), "--collection", collection }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = server.query(new SolrQuery("*:*")); assertEquals(2126, results.getResults().getNumFound()); } server.shutdown(); // try using zookeeper with replication String replicatedCollection = "replicated_collection"; createCollection(replicatedCollection, 2, 3, 2); waitForRecoveriesToFinish(false); cloudClient.setDefaultCollection(replicatedCollection); fs.delete(inDir, true); fs.delete(outDir, true); fs.delete(dataDir, true); assertTrue(fs.mkdirs(dataDir)); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live", "--zk-host", zkServer.getZkAddress(), "--collection", replicatedCollection, dataDir.toString() }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = cloudClient.query(new SolrQuery("*:*")); assertEquals(2104, results.getResults().getNumFound()); checkConsistency(replicatedCollection); } // try using solr_url with replication cloudClient.deleteByQuery("*:*"); cloudClient.commit(); fs.delete(inDir, true); fs.delete(dataDir, true); assertTrue(fs.mkdirs(dataDir)); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards", "2", "--mappers=3", "--verbose", "--go-live", "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() }; args = prependInitialArgs(args); List<String> argList = new ArrayList<String>(); getShardUrlArgs(argList, replicatedCollection); args = concat(args, argList.toArray(new String[0])); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); checkConsistency(replicatedCollection); results = cloudClient.query(new SolrQuery("*:*")); assertEquals(2104, results.getResults().getNumFound()); } }
From source file:org.cloudata.core.PerformanceTest.java
License:Apache License
private void runNIsMoreThanOne(final String cmd) throws IOException { checkTable();// w w w.j a v a2 s .co m // Run a mapreduce job. Run as many maps as asked-for clients. // Before we start up the job, write out an input file with instruction // per client regards which row they are to start on. Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd); JobConf job = new JobConf(this.conf, this.getClass()); FileInputFormat.addInputPath(job, inputDir); job.setInputFormat(TextInputFormat.class); job.setJobName("Cloudata Performance Evaluation"); job.setMapperClass(EvaluationMapTask.class); job.setMaxMapAttempts(1); job.setMaxReduceAttempts(1); job.setNumMapTasks(this.N * 10); // Ten maps per client. job.setNumReduceTasks(1); job.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(inputDir, "outputs")); JobClient.runJob(job); }
From source file:org.cloudata.core.tablet.backup.RestoreBinaryJob.java
License:Apache License
/** * @param string//from w w w . j av a 2s. c o m * @param string2 * @param binary */ public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf partitionJob = new JobConf(BackupJob.class); FileSystem fs = FileSystem.get(partitionJob); if (!fs.exists(new Path(inputPath))) { throw new IOException("input path not exists:" + inputPath); } if (CTable.existsTable(nconf, tableName)) { throw new IOException("table already exists" + tableName); } TableSchema tableSchema = new TableSchema(tableName, "", columnNames); tableSchema.setNumOfVersion(numOfVersion); CTable.createTable(nconf, tableSchema); String columns = ""; for (String eachColumn : columnNames) { columns += eachColumn.trim() + ","; } columns = columns.substring(0, columns.length() - 1); String jobName = tableName + " restore"; String tempDir = jobName + "_" + System.currentTimeMillis(); partitionJob.setJobName(tableName + " restore"); partitionJob.setMapperClass(RestoreBinaryPartitionMap.class); FileInputFormat.addInputPath(partitionJob, new Path(inputPath)); partitionJob.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class); partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir)); //map only partitionJob.setNumReduceTasks(0); JobClient.runJob(partitionJob); //delete temp output dir fs.delete(new Path(tempDir), true); //////////////////////////////////////////////////////////////// JobConf jobConf = new JobConf(BackupJob.class); jobConf.setJobName(tableName + " restore"); jobConf.setMapperClass(RestoreBinaryMap.class); FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class); jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns); FileOutputFormat.setOutputPath(jobConf, new Path(tempDir)); jobConf.setMaxMapAttempts(0); //map only jobConf.setNumReduceTasks(0); JobClient.runJob(jobConf); //delete temp output dir fs.delete(new Path(tempDir), true); }
From source file:org.cloudata.core.tablet.backup.RestoreJob.java
License:Apache License
/** * @param string// ww w . j a v a 2 s . c om * @param string2 * @param binary */ public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf partitionJob = new JobConf(RestoreJob.class); FileSystem fs = FileSystem.get(partitionJob); if (!fs.exists(new Path(inputPath))) { throw new IOException("input path not exists:" + inputPath); } if (CTable.existsTable(nconf, tableName)) { throw new IOException("table already exists" + tableName); } TableSchema tableSchema = new TableSchema(tableName, "", columnNames); tableSchema.setNumOfVersion(numOfVersion); CTable.createTable(nconf, tableSchema); String columns = ""; for (String eachColumn : columnNames) { columns += eachColumn.trim() + ","; } columns = columns.substring(0, columns.length() - 1); String jobName = tableName + " restore"; String tempDir = jobName + "_" + System.currentTimeMillis(); partitionJob.setJobName(jobName + "_partition"); partitionJob.setMapperClass(RestorePartitionMap.class); FileInputFormat.addInputPath(partitionJob, new Path(inputPath)); partitionJob.setInputFormat(RestoreTextInputFormat.class); partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir)); //map only partitionJob.setNumReduceTasks(0); JobClient.runJob(partitionJob); fs.delete(new Path(tempDir), true); //////////////////////////////////////////////////////////// JobConf jobConf = new JobConf(BackupJob.class); jobConf.setJobName(jobName); jobConf.setMapperClass(RestoreMap.class); FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(RestoreTextInputFormat.class); jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns); FileOutputFormat.setOutputPath(jobConf, new Path(tempDir)); jobConf.setMaxMapAttempts(0); //map only jobConf.setNumReduceTasks(0); JobClient.runJob(jobConf); //delete temp output dir fs.delete(new Path(tempDir), true); }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); //<MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0);/*from w w w.j a va 2 s . c o m*/ //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static Path putData() throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(ManyTableJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis()); FileOutputFormat.setOutputPath(jobConf, outputPath); //<MAP> jobConf.setMapperClass(ManyTablePutMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(numOfTables); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0);//from w ww . j a va 2 s . com //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java
License:Apache License
public static Path putData(String outputDir) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TestMultiThreadCTable.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, outputPath); JobClient jobClient = new JobClient(); int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks(); jobConf.setInt("numOfRowPerMap", numOfRowPerMap); //<MAP> jobConf.setMapperClass(PutDataMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks()); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0);//from ww w . j a v a 2s .c om //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraJob.java
License:Apache License
public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { TableSchema tableInfo = new TableSchema(tableName, "Test"); tableInfo.addColumn(new ColumnInfo("Col1")); tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE)); tableInfo.addColumn(new ColumnInfo("Col3")); CTable.createTable(nconf, tableInfo); }//from w w w .jav a2s . co m jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")"); long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets; jobConf.setInt("teraJob.dataLength", dataLength); jobConf.setLong("teraJob.rowsPerTask", rowsPerTask); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath)); //<MAP> jobConf.setMapperClass(TeraOnlineMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.setNumMapTasks(numOfTablets); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraReadJob.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length < 2) { System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>"); System.exit(0);// w w w . j a va 2 s . c o m } String tableName = args[0]; String keyOutputPath = args[1]; CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { System.out.println("Error: No table " + tableName); System.exit(0); } Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis()); jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")"); jobConf.set("TeraReadJob.tableName", tableName); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); //<MAP> jobConf.setMapperClass(TeraReadMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath)); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }