List of usage examples for org.apache.hadoop.mapred JobConf setInputFormat
public void setInputFormat(Class<? extends InputFormat> theClass)
From source file:ivory.ptc.driver.XMLFormatQueries.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();//from w w w . j a v a 2 s . c om return -1; } JobConf conf = new JobConf(getConf(), XMLFormatQueries.class); // Command line arguments String inPath = args[0]; String outPath = args[1]; Path inputPath = new Path(inPath); Path outputPath = new Path(outPath); int mapTasks = 1; int reduceTasks = 1; conf.setJobName("FormatPseudoQueries"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); conf.set("mapred.child.java.opts", "-Xmx2048m"); FileSystem.get(conf).delete(outputPath); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(PseudoQuery.class); conf.setMapOutputValueClass(PseudoJudgments.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:ivory.ptc.SortedPseudoTestCollection.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), SortedPseudoTestCollection.class); FileSystem fs = FileSystem.get(conf); String inPath = conf.get("Ivory.InputPath"); String outPath = conf.get("Ivory.OutputPath"); Path inputPath = new Path(inPath); Path outputPath = new Path(outPath); int mapTasks = 1; int reduceTasks = 1; LOG.info("SortedPseudoTestCollection"); LOG.info(" - Input path: " + conf.get("Ivory.InputPath")); LOG.info(" - Output path: " + conf.get("Ivory.OutputPath")); LOG.info(" - JudgmentExtractor: " + conf.get("Ivory.JudgmentExtractor")); LOG.info(" - JudgmentExtractorParameters: " + conf.get("Ivory.JudgmentExtractorParameters")); LOG.info(" - SamplingCriterion: " + conf.get("Ivory.SamplingCriterion")); LOG.info(" - SamplingCriterionParameters: " + conf.get("Ivory.SamplingCriterionParameters")); LOG.info(" - QueryScorer: " + conf.get("Ivory.QueryScorer")); conf.setJobName("SortedPTC"); conf.setNumMapTasks(mapTasks);/*from w w w .jav a 2s . c om*/ conf.setNumReduceTasks(reduceTasks); conf.set("mapred.child.java.opts", "-Xmx4096m"); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapOutputKeyClass(PseudoQuery.class); conf.setMapOutputValueClass(PseudoJudgments.class); conf.setOutputKeyClass(PseudoQuery.class); conf.setOutputValueClass(PseudoJudgments.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); fs.delete(outputPath); JobClient.runJob(conf); return 0; }
From source file:ivory.server.RunDistributedRetrievalServers.java
License:Apache License
/** * Runs this tool./*from w ww .jav a2s . co m*/ */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return -1; } String configFile = args[0]; FileSystem fs = FileSystem.get(getConf()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration to determine number of servers to launch:"); List<String> sids = new ArrayList<String>(); NodeList servers = d.getElementsByTagName("server"); for (int i = 0; i < servers.getLength(); i++) { Node node = servers.item(i); // get server id String sid = XMLTools.getAttributeValue(node, "id", null); if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } sLogger.info(" - sid: " + sid); sids.add(sid); } int port = 7000; int numServers = sids.size(); String configPath = args[1]; if (fs.exists(new Path(configPath))) { fs.delete(new Path(configPath), true); } String fname = appendPath(configPath, "config-" + numServers + ".txt"); sLogger.info("Writing configuration to: " + fname); StringBuffer sb = new StringBuffer(); for (int n = 0; n < numServers; n++) { port++; sb.append(sids.get(n) + " " + port + "\n"); } FSDataOutputStream out = fs.create(new Path(fname), true); out.writeBytes(sb.toString()); out.close(); JobConf conf = new JobConf(RetrievalServer.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); FileInputFormat.setInputPaths(conf, new Path(fname)); conf.set("Ivory.ConfigFile", configFile); conf.set("Ivory.ConfigPath", configPath); conf.setJobName("RetrievalServers"); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("mapred.child.java.opts", "-Xmx2048m"); // conf.set("mapred.job.queue.name", "search"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("Waiting for servers to start up..."); // poll HDFS for hostnames and ports boolean allStarted = true; do { allStarted = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); if (!fs.exists(new Path(f))) { allStarted = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allStarted); // poll HDFS for ready signal that the index is ready boolean allReady = true; do { allReady = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".ready"); if (!fs.exists(new Path(f))) { allReady = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allReady); sLogger.info("All servers ready!"); sLogger.info("Host information:"); for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f)); } return 0; }
From source file:ivory.server.RunRetrievalBroker.java
License:Apache License
/** * Runs this tool.//from w w w . ja va 2 s . com */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); String ids = ""; sLogger.info("Starting retrieval broker..."); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String scoreMergeModel = args[1]; if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) { throw new RuntimeException("Unsupported score merging model: " + args[1]); } for (int i = 0; i < stats.length; i++) { String s = stats[i].getPath().toString(); if (!s.endsWith(".host")) continue; String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host")); sLogger.info("sid=" + sid + ", host=" + s); if (ids.length() != 0) ids += ";"; ids += sid; } JobConf conf = new JobConf(RunRetrievalBroker.class); conf.setJobName("RetrievalBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); conf.set("serverIDs", ids); conf.set("ServerAddressPath", configPath); conf.set("ScoreMergeModel", scoreMergeModel); conf.set("mapred.child.java.opts", "-Xmx2048m"); fs.delete(new Path(appendPath(configPath, "broker.ready")), true); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("broker started!"); while (true) { String f = appendPath(configPath, "broker.ready"); if (fs.exists(new Path(f))) { break; } Thread.sleep(5000); } String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready")); sLogger.info("broker ready at " + s); return 0; }
From source file:ivory.smrf.retrieval.distributed.RunDistributedRetrievalServers.java
License:Apache License
/** * Runs this tool./*from w ww .j a v a 2 s . co m*/ */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return -1; } String configFile = args[0]; FileSystem fs = FileSystem.get(getConf()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration to determine number of servers to launch:"); List<String> sids = new ArrayList<String>(); NodeList servers = d.getElementsByTagName("server"); for (int i = 0; i < servers.getLength(); i++) { Node node = servers.item(i); // get server id String sid = XMLTools.getAttributeValue(node, "id", null); if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } sLogger.info(" - sid: " + sid); sids.add(sid); } int port = 7000; int numServers = sids.size(); String configPath = args[1]; if (fs.exists(new Path(configPath))) { fs.delete(new Path(configPath), true); } String fname = appendPath(configPath, "config-" + numServers + ".txt"); sLogger.info("Writing configuration to: " + fname); StringBuffer sb = new StringBuffer(); for (int n = 0; n < numServers; n++) { port++; sb.append(sids.get(n) + " " + port + "\n"); } FSDataOutputStream out = fs.create(new Path(fname), true); out.writeBytes(sb.toString()); out.close(); JobConf conf = new JobConf(getConf(), RetrievalServer.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); FileInputFormat.setInputPaths(conf, new Path(fname)); conf.set("Ivory.ConfigFile", configFile); conf.set("Ivory.ConfigPath", configPath); conf.setJobName("RetrievalServers"); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("mapred.child.java.opts", "-Xmx2048m"); // conf.set("mapred.job.queue.name", "search"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("Waiting for servers to start up..."); // poll HDFS for hostnames and ports boolean allStarted = true; do { allStarted = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); if (!fs.exists(new Path(f))) { allStarted = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allStarted); // poll HDFS for ready signal that the index is ready boolean allReady = true; do { allReady = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".ready"); if (!fs.exists(new Path(f))) { allReady = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allReady); sLogger.info("All servers ready!"); sLogger.info("Host information:"); for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f)); } return 0; }
From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java
License:Apache License
/** * Runs this tool.// w w w .j av a 2 s .co m */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(getConf(), RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:ivory.smrf.retrieval.distributed.RunRetrievalBroker.java
License:Apache License
/** * Runs this tool.//from www . j av a 2s.c o m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); String ids = ""; sLogger.info("Starting retrieval broker..."); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String scoreMergeModel = args[1]; if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) { throw new RuntimeException("Unsupported score merging model: " + args[1]); } for (int i = 0; i < stats.length; i++) { String s = stats[i].getPath().toString(); if (!s.endsWith(".host")) continue; String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host")); sLogger.info("sid=" + sid + ", host=" + s); if (ids.length() != 0) ids += ";"; ids += sid; } JobConf conf = new JobConf(getConf(), RunRetrievalBroker.class); conf.setJobName("RetrievalBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); conf.set("serverIDs", ids); conf.set("ServerAddressPath", configPath); conf.set("ScoreMergeModel", scoreMergeModel); conf.set("mapred.child.java.opts", "-Xmx2048m"); fs.delete(new Path(appendPath(configPath, "broker.ready")), true); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("broker started!"); while (true) { String f = appendPath(configPath, "broker.ready"); if (fs.exists(new Path(f))) { break; } Thread.sleep(5000); } String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready")); sLogger.info("broker ready at " + s); return 0; }
From source file:ivory.smrf.retrieval.RunQueryBroker.java
License:Apache License
/** * Runs this tool.//w w w . j a va 2s. c om */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:ivory.smrf.retrieval.RunQueryHDFS.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.out.println("usage: [queries-file] [models-file]"); ToolRunner.printGenericCommandUsage(System.out); return -1; }/*from www .java 2 s .co m*/ String argsStr = Joiner.on(";").join(args); JobConf conf = new JobConf(getConf(), RunQueryHDFS.class); conf.setJobName("RunQueryHDFS"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(QueryRunner.class); conf.set("args", argsStr); conf.set("mapred.child.java.opts", "-Xmx16g"); LOG.info("argsStr: " + argsStr); JobClient client = new JobClient(conf); client.submitJob(conf); LOG.info("runner started!"); return 0; }
From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.AggrPerFt.java
License:Apache License
/** * Set the job configuration, classes and run the job. *//* w w w .j a v a 2s . c o m*/ @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { JobConf conf = HadoopUtil.generateJobConf(args); // JobConf conf = new JobConf(AggrPerFt.class); // conf.setJobName("AggrPerFt"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); /* * use compression */ // conf.set("mapred.output.compress", "true"); // conf.set("mapred.map.output.compress", "true"); // conf.set("mapred.map.output.compression.codec", // "org.apache.hadoop.io.compress.SnappyCodec"); // conf.set("mapred.output.compression.codec", // "org.apache.hadoop.io.compress.SnappyCodec"); /* set the maximum number of task per node */ int maptasks = 120; conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks); conf.set("mapred.map.tasks", "" + maptasks); conf.set("mapred.tasktracker.map", "" + maptasks); int reducetasks = 120; conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks); conf.set("mapred.reduce.tasks", "" + reducetasks); conf.set("mapred.tasktracker.reduce", "" + reducetasks); /* * heap size for the job */ conf.set("mapred.child.java.opts", "-Xmx1500m"); /* * how much virtual memory the entire process tree of each map/reduce * task will use */ conf.set("mapred.job.map.memory.mb", "2048"); conf.set("mapred.job.reduce.memory.mb", "2048"); JobClient.runJob(conf); }