List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:org.cloudata.core.tablet.backup.RestoreBinaryJob.java
License:Apache License
/** * @param string/* www . j av a 2s . c om*/ * @param string2 * @param binary */ public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf partitionJob = new JobConf(BackupJob.class); FileSystem fs = FileSystem.get(partitionJob); if (!fs.exists(new Path(inputPath))) { throw new IOException("input path not exists:" + inputPath); } if (CTable.existsTable(nconf, tableName)) { throw new IOException("table already exists" + tableName); } TableSchema tableSchema = new TableSchema(tableName, "", columnNames); tableSchema.setNumOfVersion(numOfVersion); CTable.createTable(nconf, tableSchema); String columns = ""; for (String eachColumn : columnNames) { columns += eachColumn.trim() + ","; } columns = columns.substring(0, columns.length() - 1); String jobName = tableName + " restore"; String tempDir = jobName + "_" + System.currentTimeMillis(); partitionJob.setJobName(tableName + " restore"); partitionJob.setMapperClass(RestoreBinaryPartitionMap.class); FileInputFormat.addInputPath(partitionJob, new Path(inputPath)); partitionJob.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class); partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir)); //map only partitionJob.setNumReduceTasks(0); JobClient.runJob(partitionJob); //delete temp output dir fs.delete(new Path(tempDir), true); //////////////////////////////////////////////////////////////// JobConf jobConf = new JobConf(BackupJob.class); jobConf.setJobName(tableName + " restore"); jobConf.setMapperClass(RestoreBinaryMap.class); FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(RestoreSequenceFileAsBinaryInputFormat.class); jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns); FileOutputFormat.setOutputPath(jobConf, new Path(tempDir)); jobConf.setMaxMapAttempts(0); //map only jobConf.setNumReduceTasks(0); JobClient.runJob(jobConf); //delete temp output dir fs.delete(new Path(tempDir), true); }
From source file:org.cloudata.core.tablet.backup.RestoreJob.java
License:Apache License
/** * @param string/*from ww w .j av a2s .c o m*/ * @param string2 * @param binary */ public void runRestore(String tableName, String[] columnNames, int numOfVersion, String inputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf partitionJob = new JobConf(RestoreJob.class); FileSystem fs = FileSystem.get(partitionJob); if (!fs.exists(new Path(inputPath))) { throw new IOException("input path not exists:" + inputPath); } if (CTable.existsTable(nconf, tableName)) { throw new IOException("table already exists" + tableName); } TableSchema tableSchema = new TableSchema(tableName, "", columnNames); tableSchema.setNumOfVersion(numOfVersion); CTable.createTable(nconf, tableSchema); String columns = ""; for (String eachColumn : columnNames) { columns += eachColumn.trim() + ","; } columns = columns.substring(0, columns.length() - 1); String jobName = tableName + " restore"; String tempDir = jobName + "_" + System.currentTimeMillis(); partitionJob.setJobName(jobName + "_partition"); partitionJob.setMapperClass(RestorePartitionMap.class); FileInputFormat.addInputPath(partitionJob, new Path(inputPath)); partitionJob.setInputFormat(RestoreTextInputFormat.class); partitionJob.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); FileOutputFormat.setOutputPath(partitionJob, new Path(tempDir)); //map only partitionJob.setNumReduceTasks(0); JobClient.runJob(partitionJob); fs.delete(new Path(tempDir), true); //////////////////////////////////////////////////////////// JobConf jobConf = new JobConf(BackupJob.class); jobConf.setJobName(jobName); jobConf.setMapperClass(RestoreMap.class); FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(RestoreTextInputFormat.class); jobConf.set(DefaultTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, columns); FileOutputFormat.setOutputPath(jobConf, new Path(tempDir)); jobConf.setMaxMapAttempts(0); //map only jobConf.setNumReduceTasks(0); JobClient.runJob(jobConf); //delete temp output dir fs.delete(new Path(tempDir), true); }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); //<MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);/*from w w w.j a v a 2s.c o m*/ //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static Path putData() throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(ManyTableJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis()); FileOutputFormat.setOutputPath(jobConf, outputPath); //<MAP> jobConf.setMapperClass(ManyTablePutMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(numOfTables); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);/*from ww w.j av a 2 s.co m*/ //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java
License:Apache License
public static Path putData(String outputDir) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TestMultiThreadCTable.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, outputPath); JobClient jobClient = new JobClient(); int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks(); jobConf.setInt("numOfRowPerMap", numOfRowPerMap); //<MAP> jobConf.setMapperClass(PutDataMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks()); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);//from w ww. j av a 2 s .c om //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraJob.java
License:Apache License
public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { TableSchema tableInfo = new TableSchema(tableName, "Test"); tableInfo.addColumn(new ColumnInfo("Col1")); tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE)); tableInfo.addColumn(new ColumnInfo("Col3")); CTable.createTable(nconf, tableInfo); }//from w ww . j a va 2 s . c o m jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")"); long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets; jobConf.setInt("teraJob.dataLength", dataLength); jobConf.setLong("teraJob.rowsPerTask", rowsPerTask); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath)); //<MAP> jobConf.setMapperClass(TeraOnlineMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.setNumMapTasks(numOfTablets); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraReadJob.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length < 2) { System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>"); System.exit(0);/*ww w . j a v a2s .c o m*/ } String tableName = args[0]; String keyOutputPath = args[1]; CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { System.out.println("Error: No table " + tableName); System.exit(0); } Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis()); jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")"); jobConf.set("TeraReadJob.tableName", tableName); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); //<MAP> jobConf.setMapperClass(TeraReadMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath)); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraScanJob.java
License:Apache License
public void runJob(String tableName) throws IOException { JobConf jobConf = new JobConf(TeraScanJob.class); CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, tableName)) { System.out.println("No table:" + tableName); System.exit(0);//from ww w .j av a 2 s. c o m } Path tempOutputPath = new Path("TeraScanJob" + System.currentTimeMillis()); jobConf.setJobName("TeraScanJob" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(TeraScanMap.class); jobConf.setInputFormat(TeraScanJobTabletInputFormat.class); jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> // jobConf.setReducerClass(DocFreqReduce.class); // jobConf.setOutputKeyClass(Text.class); // jobConf.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(0); //</REDUCE> //Run Job JobClient.runJob(jobConf); // //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); }
From source file:org.cloudata.examples.first.FirstMapReduce.java
License:Apache License
public static void main(String[] args) throws Exception { //Output ? ?//from w w w . j ava 2 s. co m CloudataConf conf = new CloudataConf(); String outputTableName = "InvertedTable"; TableSchema outputTableSchema = new TableSchema(); outputTableSchema.addColumn("InvertedCloumn"); if (!CTable.existsTable(conf, outputTableName)) { CTable.createTable(conf, outputTableSchema); } JobConf jobConf = new JobConf(FirstMapReduce.class); jobConf.setJobName("FirstMapReduce"); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); //<Mapper> //Mapper ? jobConf.setMapperClass(FirstMapReduceMapper.class); //InputFormat? TabletInputFormat jobConf.setInputFormat(FirstMapReduceInputFormat.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); //</Mapper> //<Reducer> String outputPath = "temp/FirstMapReduce"; FileOutputFormat.setOutputPath(jobConf, new Path(outputPath)); //Reducer ? jobConf.setReducerClass(FirstMapReduceReducer.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); //Map Reduce ?? . CTable ctable = CTable.openTable(conf, "SampleTable1"); TabletInfo[] tabletInfos = ctable.listTabletInfos(); jobConf.setNumReduceTasks(tabletInfos.length); //Reduce? Tablet? ?? ? ? //Task ?? ? 0 . jobConf.setMaxReduceAttempts(0); //</Reducer> try { //Job JobClient.runJob(jobConf); } finally { //Temp FileSystem fs = FileSystem.get(jobConf); fs.delete(new Path(outputPath), true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.examples.first.HdfsToCloudataMapReduce.java
License:Apache License
public void run(String[] args) throws IOException { if (args.length < 2) { System.out.println("Usage: java HdfsToCloudataMapReduce <input path> <table name>"); System.exit(0);/*from w w w . j a v a2 s .c o m*/ } Path inputPath = new Path(args[0]); String tableName = args[1]; CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, tableName)) { TableSchema tableSchema = new TableSchema(tableName); tableSchema.addColumn("col1"); CTable.createTable(nconf, tableSchema); } JobConf jobConf = new JobConf(HdfsToCloudataMapReduce.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); // <MAP> FileInputFormat.addInputPath(jobConf, inputPath); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(HdfsToCloudataMappper.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); // </MAP> // <REDUCE> // Map Only FileOutputFormat.setOutputPath(jobConf, new Path("HdfsToCloudataMapReduce_" + System.currentTimeMillis())); jobConf.setNumReduceTasks(0); // </REDUCE> try { JobClient.runJob(jobConf); } catch (Exception e) { e.printStackTrace(); } finally { FileSystem fs = FileSystem.get(jobConf); fs.delete(FileOutputFormat.getOutputPath(jobConf), true); CloudataMapReduceUtil.clearMapReduce(libDir); } }