List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:ivory.ptc.AnchorTextInvertedIndex.java
License:Apache License
@Override public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), AnchorTextInvertedIndex.class); FileSystem fs = FileSystem.get(conf); String inPath = conf.get("Ivory.InputPath"); String outPath = conf.get("Ivory.OutputPath"); Path inputPath = new Path(inPath); Path outputPath = new Path(outPath); int mapTasks = conf.getInt("Ivory.NumMapTasks", 1); int reduceTasks = conf.getInt("Ivory.NumReduceTasks", 100); String weightingSchemeParameters = conf.get("Ivory.WeightingSchemeParameters"); LOG.info("BuildAnchorTextInvertedIndex"); LOG.info(" - input path: " + inPath); LOG.info(" - output path: " + outPath); LOG.info(" - number of reducers: " + reduceTasks); LOG.info(" - weighting scheme: " + conf.get("Ivory.WeightingScheme")); LOG.info(" - weighting scheme parameters: " + weightingSchemeParameters); String[] params = weightingSchemeParameters.split(PARAMETER_SEPARATER); for (String param : params) { DistributedCache.addCacheFile(new URI(param), conf); }/* ww w . j a v a 2s .c o m*/ conf.setJobName("BuildAnchorTextInvertedIndex"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); conf.set("mapred.child.java.opts", "-Xmx4096m"); conf.setInt("mapred.task.timeout", 60000000); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(AnchorTextTarget.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); fs.delete(outputPath); JobClient.runJob(conf); return 0; }
From source file:ivory.ptc.driver.XMLFormatJudgments.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { printUsage();//from w w w . j a va 2 s .c om return -1; } JobConf conf = new JobConf(getConf(), XMLFormatJudgments.class); // Command line arguments String inPath = args[0]; String outPath = args[1]; String docnoMapping = args[2]; Path inputPath = new Path(inPath); Path outputPath = new Path(outPath); int mapTasks = 1; int reduceTasks = 1; conf.setJobName("FormatPseudoJudgments"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); conf.set("mapred.child.java.opts", "-Xmx2048m"); DistributedCache.addCacheFile(new URI(docnoMapping), conf); FileSystem.get(conf).delete(outputPath); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(PseudoQuery.class); conf.setMapOutputValueClass(PseudoJudgments.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:ivory.ptc.driver.XMLFormatQueries.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { printUsage();/* w w w .j ava 2 s. co m*/ return -1; } JobConf conf = new JobConf(getConf(), XMLFormatQueries.class); // Command line arguments String inPath = args[0]; String outPath = args[1]; Path inputPath = new Path(inPath); Path outputPath = new Path(outPath); int mapTasks = 1; int reduceTasks = 1; conf.setJobName("FormatPseudoQueries"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); conf.set("mapred.child.java.opts", "-Xmx2048m"); FileSystem.get(conf).delete(outputPath); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(PseudoQuery.class); conf.setMapOutputValueClass(PseudoJudgments.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:ivory.ptc.SortedPseudoTestCollection.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), SortedPseudoTestCollection.class); FileSystem fs = FileSystem.get(conf); String inPath = conf.get("Ivory.InputPath"); String outPath = conf.get("Ivory.OutputPath"); Path inputPath = new Path(inPath); Path outputPath = new Path(outPath); int mapTasks = 1; int reduceTasks = 1; LOG.info("SortedPseudoTestCollection"); LOG.info(" - Input path: " + conf.get("Ivory.InputPath")); LOG.info(" - Output path: " + conf.get("Ivory.OutputPath")); LOG.info(" - JudgmentExtractor: " + conf.get("Ivory.JudgmentExtractor")); LOG.info(" - JudgmentExtractorParameters: " + conf.get("Ivory.JudgmentExtractorParameters")); LOG.info(" - SamplingCriterion: " + conf.get("Ivory.SamplingCriterion")); LOG.info(" - SamplingCriterionParameters: " + conf.get("Ivory.SamplingCriterionParameters")); LOG.info(" - QueryScorer: " + conf.get("Ivory.QueryScorer")); conf.setJobName("SortedPTC"); conf.setNumMapTasks(mapTasks);// ww w .j a v a 2 s . c om conf.setNumReduceTasks(reduceTasks); conf.set("mapred.child.java.opts", "-Xmx4096m"); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapOutputKeyClass(PseudoQuery.class); conf.setMapOutputValueClass(PseudoJudgments.class); conf.setOutputKeyClass(PseudoQuery.class); conf.setOutputValueClass(PseudoJudgments.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); fs.delete(outputPath); JobClient.runJob(conf); return 0; }
From source file:ivory.server.RunDistributedRetrievalServers.java
License:Apache License
/** * Runs this tool.//from ww w .j a v a 2 s. com */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return -1; } String configFile = args[0]; FileSystem fs = FileSystem.get(getConf()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration to determine number of servers to launch:"); List<String> sids = new ArrayList<String>(); NodeList servers = d.getElementsByTagName("server"); for (int i = 0; i < servers.getLength(); i++) { Node node = servers.item(i); // get server id String sid = XMLTools.getAttributeValue(node, "id", null); if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } sLogger.info(" - sid: " + sid); sids.add(sid); } int port = 7000; int numServers = sids.size(); String configPath = args[1]; if (fs.exists(new Path(configPath))) { fs.delete(new Path(configPath), true); } String fname = appendPath(configPath, "config-" + numServers + ".txt"); sLogger.info("Writing configuration to: " + fname); StringBuffer sb = new StringBuffer(); for (int n = 0; n < numServers; n++) { port++; sb.append(sids.get(n) + " " + port + "\n"); } FSDataOutputStream out = fs.create(new Path(fname), true); out.writeBytes(sb.toString()); out.close(); JobConf conf = new JobConf(RetrievalServer.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); FileInputFormat.setInputPaths(conf, new Path(fname)); conf.set("Ivory.ConfigFile", configFile); conf.set("Ivory.ConfigPath", configPath); conf.setJobName("RetrievalServers"); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("mapred.child.java.opts", "-Xmx2048m"); // conf.set("mapred.job.queue.name", "search"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("Waiting for servers to start up..."); // poll HDFS for hostnames and ports boolean allStarted = true; do { allStarted = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); if (!fs.exists(new Path(f))) { allStarted = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allStarted); // poll HDFS for ready signal that the index is ready boolean allReady = true; do { allReady = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".ready"); if (!fs.exists(new Path(f))) { allReady = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allReady); sLogger.info("All servers ready!"); sLogger.info("Host information:"); for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f)); } return 0; }
From source file:ivory.server.RunRetrievalBroker.java
License:Apache License
/** * Runs this tool.//w w w. j a va 2s . com */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); String ids = ""; sLogger.info("Starting retrieval broker..."); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String scoreMergeModel = args[1]; if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) { throw new RuntimeException("Unsupported score merging model: " + args[1]); } for (int i = 0; i < stats.length; i++) { String s = stats[i].getPath().toString(); if (!s.endsWith(".host")) continue; String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host")); sLogger.info("sid=" + sid + ", host=" + s); if (ids.length() != 0) ids += ";"; ids += sid; } JobConf conf = new JobConf(RunRetrievalBroker.class); conf.setJobName("RetrievalBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); conf.set("serverIDs", ids); conf.set("ServerAddressPath", configPath); conf.set("ScoreMergeModel", scoreMergeModel); conf.set("mapred.child.java.opts", "-Xmx2048m"); fs.delete(new Path(appendPath(configPath, "broker.ready")), true); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("broker started!"); while (true) { String f = appendPath(configPath, "broker.ready"); if (fs.exists(new Path(f))) { break; } Thread.sleep(5000); } String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready")); sLogger.info("broker ready at " + s); return 0; }
From source file:ivory.smrf.retrieval.distributed.RunDistributedRetrievalServers.java
License:Apache License
/** * Runs this tool.//from www .ja v a 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return -1; } String configFile = args[0]; FileSystem fs = FileSystem.get(getConf()); Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile))); sLogger.info("Reading configuration to determine number of servers to launch:"); List<String> sids = new ArrayList<String>(); NodeList servers = d.getElementsByTagName("server"); for (int i = 0; i < servers.getLength(); i++) { Node node = servers.item(i); // get server id String sid = XMLTools.getAttributeValue(node, "id", null); if (sid == null) { throw new Exception("Must specify a query id attribute for every server!"); } sLogger.info(" - sid: " + sid); sids.add(sid); } int port = 7000; int numServers = sids.size(); String configPath = args[1]; if (fs.exists(new Path(configPath))) { fs.delete(new Path(configPath), true); } String fname = appendPath(configPath, "config-" + numServers + ".txt"); sLogger.info("Writing configuration to: " + fname); StringBuffer sb = new StringBuffer(); for (int n = 0; n < numServers; n++) { port++; sb.append(sids.get(n) + " " + port + "\n"); } FSDataOutputStream out = fs.create(new Path(fname), true); out.writeBytes(sb.toString()); out.close(); JobConf conf = new JobConf(getConf(), RetrievalServer.class); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NLineInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); FileInputFormat.setInputPaths(conf, new Path(fname)); conf.set("Ivory.ConfigFile", configFile); conf.set("Ivory.ConfigPath", configPath); conf.setJobName("RetrievalServers"); //conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.set("mapred.child.java.opts", "-Xmx2048m"); // conf.set("mapred.job.queue.name", "search"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("Waiting for servers to start up..."); // poll HDFS for hostnames and ports boolean allStarted = true; do { allStarted = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); if (!fs.exists(new Path(f))) { allStarted = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allStarted); // poll HDFS for ready signal that the index is ready boolean allReady = true; do { allReady = true; for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".ready"); if (!fs.exists(new Path(f))) { allReady = false; } } Thread.sleep(10000); sLogger.info(" ..."); } while (!allReady); sLogger.info("All servers ready!"); sLogger.info("Host information:"); for (int n = 0; n < numServers; n++) { String f = appendPath(configPath, sids.get(n) + ".host"); sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f)); } return 0; }
From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java
License:Apache License
/** * Runs this tool.//from w w w .j a va2 s . co m */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(getConf(), RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:ivory.smrf.retrieval.distributed.RunRetrievalBroker.java
License:Apache License
/** * Runs this tool./* w w w .ja v a 2s. c om*/ */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); String ids = ""; sLogger.info("Starting retrieval broker..."); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String scoreMergeModel = args[1]; if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) { throw new RuntimeException("Unsupported score merging model: " + args[1]); } for (int i = 0; i < stats.length; i++) { String s = stats[i].getPath().toString(); if (!s.endsWith(".host")) continue; String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host")); sLogger.info("sid=" + sid + ", host=" + s); if (ids.length() != 0) ids += ";"; ids += sid; } JobConf conf = new JobConf(getConf(), RunRetrievalBroker.class); conf.setJobName("RetrievalBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(ServerMapper.class); conf.set("serverIDs", ids); conf.set("ServerAddressPath", configPath); conf.set("ScoreMergeModel", scoreMergeModel); conf.set("mapred.child.java.opts", "-Xmx2048m"); fs.delete(new Path(appendPath(configPath, "broker.ready")), true); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("broker started!"); while (true) { String f = appendPath(configPath, "broker.ready"); if (fs.exists(new Path(f))) { break; } Thread.sleep(5000); } String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready")); sLogger.info("broker ready at " + s); return 0; }
From source file:ivory.smrf.retrieval.RunQueryBroker.java
License:Apache License
/** * Runs this tool./*from w w w .j a v a2 s. c om*/ */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }