List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:ivory.server.RunDistributedRetrievalServers.java
License:Apache License
/** * Runs this tool./*w w w .j a v a2 s .c o m*/ */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return -1; } String configFile = args[0]; FileSystem fs = FileSystem.get(getConf()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration to determine number of servers to launch:"); List<String> sids = new ArrayList<String>(); NodeList servers = d.getElementsByTagName("server"); for (int i = 0; i < servers.getLength(); i++) { Node node = servers.item(i); // get server id String sid = XMLTools.getAttributeValue(node, "id", null); if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } sLogger.info(" - sid: " + sid); sids.add(sid); } int port = 7000; int numServers = sids.size(); String configPath = args[1]; if (fs.exists(new Path(configPath))) { fs.delete(new Path(configPath), true); } String fname = appendPath(configPath, "config-" + numServers + ".txt"); sLogger.info("Writing configuration to: " + fname); StringBuffer sb = new StringBuffer(); for (int n = 0; n < numServers; n++) { port++; sb.append(sids.get(n) + " " + port + "\n"); } FSDataOutputStream out = fs.create(new Path(fname), true); out.writeBytes(sb.toString()); out.close(); JobConf conf = new JobConf(RetrievalServer.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); FileInputFormat.setInputPaths(conf, new Path(fname)); conf.set("Ivory.ConfigFile", configFile); conf.set("Ivory.ConfigPath", configPath); conf.setJobName("RetrievalServers"); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("mapred.child.java.opts", "-Xmx2048m"); // conf.set("mapred.job.queue.name", "search"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("Waiting for servers to start up..."); // poll HDFS for hostnames and ports boolean allStarted = true; do { allStarted = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); if (!fs.exists(new Path(f))) { allStarted = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allStarted); // poll HDFS for ready signal that the index is ready boolean allReady = true; do { allReady = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".ready"); if (!fs.exists(new Path(f))) { allReady = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allReady); sLogger.info("All servers ready!"); sLogger.info("Host information:"); for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f)); } return 0; }
From source file:ivory.server.RunRetrievalBroker.java
License:Apache License
/** * Runs this tool.//from w w w. j a v a2 s. co m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); String ids = ""; sLogger.info("Starting retrieval broker..."); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String scoreMergeModel = args[1]; if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) { throw new RuntimeException("Unsupported score merging model: " + args[1]); } for (int i = 0; i < stats.length; i++) { String s = stats[i].getPath().toString(); if (!s.endsWith(".host")) continue; String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host")); sLogger.info("sid=" + sid + ", host=" + s); if (ids.length() != 0) ids += ";"; ids += sid; } JobConf conf = new JobConf(RunRetrievalBroker.class); conf.setJobName("RetrievalBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); conf.set("serverIDs", ids); conf.set("ServerAddressPath", configPath); conf.set("ScoreMergeModel", scoreMergeModel); conf.set("mapred.child.java.opts", "-Xmx2048m"); fs.delete(new Path(appendPath(configPath, "broker.ready")), true); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("broker started!"); while (true) { String f = appendPath(configPath, "broker.ready"); if (fs.exists(new Path(f))) { break; } Thread.sleep(5000); } String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready")); sLogger.info("broker ready at " + s); return 0; }
From source file:ivory.smrf.retrieval.distributed.RunDistributedRetrievalServers.java
License:Apache License
/** * Runs this tool.//w w w . j ava 2s . co m */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return -1; } String configFile = args[0]; FileSystem fs = FileSystem.get(getConf()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration to determine number of servers to launch:"); List<String> sids = new ArrayList<String>(); NodeList servers = d.getElementsByTagName("server"); for (int i = 0; i < servers.getLength(); i++) { Node node = servers.item(i); // get server id String sid = XMLTools.getAttributeValue(node, "id", null); if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } sLogger.info(" - sid: " + sid); sids.add(sid); } int port = 7000; int numServers = sids.size(); String configPath = args[1]; if (fs.exists(new Path(configPath))) { fs.delete(new Path(configPath), true); } String fname = appendPath(configPath, "config-" + numServers + ".txt"); sLogger.info("Writing configuration to: " + fname); StringBuffer sb = new StringBuffer(); for (int n = 0; n < numServers; n++) { port++; sb.append(sids.get(n) + " " + port + "\n"); } FSDataOutputStream out = fs.create(new Path(fname), true); out.writeBytes(sb.toString()); out.close(); JobConf conf = new JobConf(getConf(), RetrievalServer.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); FileInputFormat.setInputPaths(conf, new Path(fname)); conf.set("Ivory.ConfigFile", configFile); conf.set("Ivory.ConfigPath", configPath); conf.setJobName("RetrievalServers"); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("mapred.child.java.opts", "-Xmx2048m"); // conf.set("mapred.job.queue.name", "search"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("Waiting for servers to start up..."); // poll HDFS for hostnames and ports boolean allStarted = true; do { allStarted = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); if (!fs.exists(new Path(f))) { allStarted = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allStarted); // poll HDFS for ready signal that the index is ready boolean allReady = true; do { allReady = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".ready"); if (!fs.exists(new Path(f))) { allReady = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allReady); sLogger.info("All servers ready!"); sLogger.info("Host information:"); for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f)); } return 0; }
From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java
License:Apache License
/** * Runs this tool.//from w w w . j ava 2s . c o m */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(getConf(), RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:ivory.smrf.retrieval.distributed.RunRetrievalBroker.java
License:Apache License
/** * Runs this tool./*from w w w . ja v a 2 s .co m*/ */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); String ids = ""; sLogger.info("Starting retrieval broker..."); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String scoreMergeModel = args[1]; if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) { throw new RuntimeException("Unsupported score merging model: " + args[1]); } for (int i = 0; i < stats.length; i++) { String s = stats[i].getPath().toString(); if (!s.endsWith(".host")) continue; String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host")); sLogger.info("sid=" + sid + ", host=" + s); if (ids.length() != 0) ids += ";"; ids += sid; } JobConf conf = new JobConf(getConf(), RunRetrievalBroker.class); conf.setJobName("RetrievalBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); conf.set("serverIDs", ids); conf.set("ServerAddressPath", configPath); conf.set("ScoreMergeModel", scoreMergeModel); conf.set("mapred.child.java.opts", "-Xmx2048m"); fs.delete(new Path(appendPath(configPath, "broker.ready")), true); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("broker started!"); while (true) { String f = appendPath(configPath, "broker.ready"); if (fs.exists(new Path(f))) { break; } Thread.sleep(5000); } String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready")); sLogger.info("broker ready at " + s); return 0; }
From source file:ivory.smrf.retrieval.RunQueryBroker.java
License:Apache License
/** * Runs this tool./*from w ww . ja va 2 s . co m*/ */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:ivory.smrf.retrieval.RunQueryHDFS.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage: [queries-file] [models-file]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }// www . j a v a2 s. c om String argsStr = Joiner.on(";").join(args); JobConf conf = new JobConf(getConf(), RunQueryHDFS.class); conf.setJobName("RunQueryHDFS"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(QueryRunner.class); conf.set("args", argsStr); conf.set("mapred.child.java.opts", "-Xmx16g"); LOG.info("argsStr: " + argsStr); JobClient client = new JobClient(conf); client.submitJob(conf); LOG.info("runner started!"); return 0; }
From source file:job.uncombine.compressed.BigBuildInvertedIndex.java
License:Apache License
/** * Runs this tool./* w w w. j a v a 2 s .c om*/ */ public int run(String[] args) throws Exception { //long GB = 1024 * 1024 * 1024; //long totalDataSize = 1 * GB; int reduceNumArray[] = { 9, 18 }; int splitSizeMBArray[] = { 64, 128, 256 }; int xmxArray[] = { 1000, 2000, 3000, 4000 }; int xmsArray[] = { 0, 1 }; int ismbArray[] = { 200, 400, 600, 800 }; for (int splitIndex = 0; splitIndex < splitSizeMBArray.length; splitIndex++) { for (int reduceNumIndex = 0; reduceNumIndex < reduceNumArray.length; reduceNumIndex++) { for (int xmxIndex = 0; xmxIndex < xmxArray.length; xmxIndex++) { for (int xmsIndex = 0; xmsIndex < xmsArray.length; xmsIndex++) { for (int ismbIndex = 0; ismbIndex < ismbArray.length; ismbIndex++) { int reduceNum = reduceNumArray[reduceNumIndex]; int splitMB = splitSizeMBArray[splitIndex]; int xmx = xmxArray[xmxIndex]; int xms = xmsArray[xmsIndex] * xmx; int ismb = ismbArray[ismbIndex]; JobConf conf = new JobConf(getConf(), BigBuildInvertedIndex.class); conf.setLong("mapred.min.split.size", SplitTable.getMapred_min_split_size(splitMB)); conf.setLong("mapred.max.split.size", SplitTable.getMapred_max_split_size(splitMB)); //conf.setInt("my.sample.split.num", (int) (totalDataSize / (splitMB * 1024 * 1024))); conf.setInt("mapred.reduce.tasks", reduceNum); conf.setInt("io.sort.mb", ismb); if (xms == 0) conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m"); else conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m -Xms" + xms + "m"); conf.setInt("child.monitor.metrics.seconds", 2); conf.setInt("child.monitor.jvm.seconds", 2); conf.setInt("child.monitor.jstat.seconds", 2); conf.setJobName("BigBuildInvertedIndex " + splitMB + "MB " + conf.get("mapred.child.java.opts") + " ismb=" + ismb + " RN=" + reduceNum); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: BigBuildInvertedIndex <in> <out>"); System.exit(2); } conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(PairOfInts.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(PairOfWritables.class); SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); conf.setOutputFormat(MapFileOutputFormat.class); conf.setMapperClass(MyMapper.class); // conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(MyReducer.class); FileInputFormat.setInputPaths(conf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); FileSystem.get(conf).delete(new Path(otherArgs[1]), true); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } Thread.sleep(15000); } } } } } return 0; }
From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerFt.java
License:Apache License
/** * Set the job configuration, classes and run the job. *//* w ww . j av a 2s . co m*/ @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { JobConf conf = HadoopUtil.generateJobConf(args); // JobConf conf = new JobConf(AggrPerFt.class); // conf.setJobName("AggrPerFt"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); /* * use compression */ // conf.set("mapred.output.compress", "true"); // conf.set("mapred.map.output.compress", "true"); // conf.set("mapred.map.output.compression.codec", // "org.apache.hadoop.io.compress.SnappyCodec"); // conf.set("mapred.output.compression.codec", // "org.apache.hadoop.io.compress.SnappyCodec"); /* set the maximum number of task per node */ int maptasks = 120; conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks); conf.set("mapred.map.tasks", "" + maptasks); conf.set("mapred.tasktracker.map", "" + maptasks); int reducetasks = 120; conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks); conf.set("mapred.reduce.tasks", "" + reducetasks); conf.set("mapred.tasktracker.reduce", "" + reducetasks); /* * heap size for the job */ conf.set("mapred.child.java.opts", "-Xmx1500m"); /* * how much virtual memory the entire process tree of each map/reduce * task will use */ conf.set("mapred.job.map.memory.mb", "2048"); conf.set("mapred.job.reduce.memory.mb", "2048"); JobClient.runJob(conf); }
From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerFtUniquePositions.java
License:Apache License
/** * Set the job configuration, classes and run the job. *//*from w w w . j av a 2 s .co m*/ @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { JobConf conf = HadoopUtil.generateJobConf(args); // JobConf conf = new JobConf(AggrPerFtUniquePositions.class); conf.setJobName("AggrPerFtUniquePositions " + args[0] + " " + args[1]); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); /* * use compression */ conf.set("mapred.output.compress", "true"); conf.set("mapred.map.output.compress", "true"); conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); conf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); /* set the maximum number of task per node */ int maptasks = 120; conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks); conf.set("mapred.map.tasks", "" + maptasks); conf.set("mapred.tasktracker.map", "" + maptasks); int reducetasks = 60; conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks); conf.set("mapred.reduce.tasks", "" + reducetasks); conf.set("mapred.tasktracker.reduce", "" + reducetasks); /* * heap size for the job */ conf.set("mapred.child.java.opts", "-Xmx1500m"); /* * how much virtual memory the entire process tree of each map/reduce * task will use */ conf.set("mapred.job.map.memory.mb", "2048"); conf.set("mapred.job.reduce.memory.mb", "2048"); JobClient.runJob(conf); }