List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();// w w w .j av a 2s . c om return 1; } try { Job job = new Job(new Configuration()); job.setJobName(getClass().getName()); Configuration conf = job.getConfiguration(); CommandLine results = cli.parse(conf, args); if (results.hasOption("input")) { Path path = new Path(results.getOptionValue("input")); FileInputFormat.setInputPaths(job, path); } if (results.hasOption("output")) { Path path = new Path(results.getOptionValue("output")); FileOutputFormat.setOutputPath(job, path); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { explicitInputFormat = true; setIsJavaRecordReader(conf, true); job.setInputFormatClass(getClass(results, "inputformat", conf, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(conf, true); } if (results.hasOption("map")) { setIsJavaMapper(conf, true); job.setMapperClass(getClass(results, "map", conf, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", conf, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(conf, true); job.setReducerClass(getClass(results, "reduce", conf, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { explicitOutputFormat = true; setIsJavaRecordWriter(conf, true); job.setOutputFormatClass(getClass(results, "writer", conf, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormatClass()); } } if (results.hasOption("avroInput")) { avroInput = AvroIO.valueOf(results.getOptionValue("avroInput").toUpperCase()); } if (results.hasOption("avroOutput")) { avroOutput = AvroIO.valueOf(results.getOptionValue("avroOutput").toUpperCase()); } if (results.hasOption("program")) { setExecutable(conf, results.getOptionValue("program")); } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(conf).pathToFile(new Path(jarFile)).toURL() }; // FindBugs complains that creating a URLClassLoader should be // in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); conf.setClassLoader(loader); } setupPipesJob(job); return job.waitForCompletion(true) ? 0 : 1; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:it.crs4.pydoop.mapreduce.pipes.TaskLog.java
License:Apache License
/** * Obtain the owner of the log dir. This is * determined by checking the job's log directory. */// www . jav a2s. co m static String obtainLogDirOwner(TaskAttemptID taskid) throws IOException { Configuration conf = new Configuration(); FileSystem raw = FileSystem.getLocal(conf).getRaw(); Path jobLogDir = new Path(getJobDir(taskid.getJobID()).getAbsolutePath()); FileStatus jobStat = raw.getFileStatus(jobLogDir); return jobStat.getOwner(); }
From source file:it.crs4.pydoop.mapreduce.pipes.TaskLog.java
License:Apache License
private static synchronized void writeToIndexFile(String logLocation, boolean isCleanup) throws IOException { // To ensure atomicity of updates to index file, write to temporary index // file first and then rename. File tmpIndexFile = getTmpIndexFile(currentTaskid, isCleanup); BufferedOutputStream bos = new BufferedOutputStream(SecureIOUtils.createForWrite(tmpIndexFile, 0644)); DataOutputStream dos = new DataOutputStream(bos); //the format of the index file is //LOG_DIR: <the dir where the task logs are really stored> //STDOUT: <start-offset in the stdout file> <length> //STDERR: <start-offset in the stderr file> <length> //SYSLOG: <start-offset in the syslog file> <length> try {//from w w w .j ava2 s . c o m dos.writeBytes(LogFileDetail.LOCATION + logLocation + "\n" + LogName.STDOUT.toString() + ":"); dos.writeBytes(Long.toString(prevOutLength) + " "); dos.writeBytes(Long.toString(new File(logLocation, LogName.STDOUT.toString()).length() - prevOutLength) + "\n" + LogName.STDERR + ":"); dos.writeBytes(Long.toString(prevErrLength) + " "); dos.writeBytes(Long.toString(new File(logLocation, LogName.STDERR.toString()).length() - prevErrLength) + "\n" + LogName.SYSLOG.toString() + ":"); dos.writeBytes(Long.toString(prevLogLength) + " "); dos.writeBytes(Long.toString(new File(logLocation, LogName.SYSLOG.toString()).length() - prevLogLength) + "\n"); dos.close(); dos = null; } finally { IOUtils.cleanup(LOG, dos); } File indexFile = getIndexFile(currentTaskid, isCleanup); Path indexFilePath = new Path(indexFile.getAbsolutePath()); Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath()); if (localFS == null) {// set localFS once localFS = FileSystem.getLocal(new Configuration()); } localFS.rename(tmpIndexFilePath, indexFilePath); }
From source file:it.crs4.pydoop.pipes.Application.java
License:Apache License
private void writePasswordToLocalFile(String localPasswordFile, byte[] password, JobConf conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Path localPath = new Path(localPasswordFile); FSDataOutputStream out = FileSystem.create(localFs, localPath, new FsPermission("400")); out.write(password);/*w w w. ja v a 2s.co m*/ out.close(); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
@Override public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();//from w w w .ja v a2s. com return 1; } cli.addOption("input", false, "input path to the maps", "path"); cli.addOption("output", false, "output path from the reduces", "path"); cli.addOption("jar", false, "job jar file", "path"); cli.addOption("inputformat", false, "java classname of InputFormat", "class"); //cli.addArgument("javareader", false, "is the RecordReader in Java"); cli.addOption("map", false, "java classname of Mapper", "class"); cli.addOption("partitioner", false, "java classname of Partitioner", "class"); cli.addOption("reduce", false, "java classname of Reducer", "class"); cli.addOption("writer", false, "java classname of OutputFormat", "class"); cli.addOption("program", false, "URI to application executable", "class"); cli.addOption("reduces", false, "number of reduces", "num"); cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val"); cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean"); Parser parser = cli.createParser(); try { GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args); CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs()); JobConf job = new JobConf(getConf()); if (results.hasOption("input")) { FileInputFormat.setInputPaths(job, results.getOptionValue("input")); } if (results.hasOption("output")) { FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output"))); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { setIsJavaRecordReader(job, true); job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(job, true); } if (results.hasOption("map")) { setIsJavaMapper(job, true); job.setMapperClass(getClass(results, "map", job, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(job, true); job.setReducerClass(getClass(results, "reduce", job, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { setIsJavaRecordWriter(job, true); job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass()); } } if (results.hasOption("program")) { setExecutable(job, results.getOptionValue("program")); } if (results.hasOption("jobconf")) { LOG.warn("-jobconf option is deprecated, please use -D instead."); String options = results.getOptionValue("jobconf"); StringTokenizer tokenizer = new StringTokenizer(options, ","); while (tokenizer.hasMoreTokens()) { String keyVal = tokenizer.nextToken().trim(); String[] keyValSplit = keyVal.split("="); job.set(keyValSplit[0], keyValSplit[1]); } } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() }; //FindBugs complains that creating a URLClassLoader should be //in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); job.setClassLoader(loader); } runJob(job); return 0; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:it.crs4.seal.demux.DemuxReducer.java
License:Open Source License
public void setup(String localSampleSheetPath, Configuration conf) throws IOException { // load the sample sheet Path path = new Path(localSampleSheetPath).makeQualified(FileSystem.getLocal(conf)); SampleSheet sampleSheet;/*from w w w . ja v a 2s. co m*/ try { sampleSheet = DemuxUtils.loadSampleSheet(path, conf); } catch (SampleSheet.FormatException e) { throw new RuntimeException("Error loading sample sheet. Message: " + e.getMessage()); } barcodeLookup = new BarcodeLookup(sampleSheet, conf.getInt(Demux.CONF_MAX_MISMATCHES, Demux.DEFAULT_MAX_MISMATCHES)); expectIndexRead = !conf.getBoolean(Demux.CONF_NO_INDEX_READS, false); separatesReads = conf.getBoolean(Demux.CONF_SEPARATE_READS, false); }
From source file:it.crs4.seal.read_sort.ReadSort.java
License:Open Source License
public static Path getAnnotationPath(Configuration conf) throws IOException { String annotationName = conf.get(ReadSort.REF_ANN_PROP_NAME); if (annotationName == null) throw new RuntimeException("missing property " + REF_ANN_PROP_NAME); LOG.info("reading reference annotation from " + annotationName); Path annPath = new Path(annotationName); FileSystem srcFs;/*w ww .j av a 2s . c o m*/ if (conf.get("mapred.cache.archives") != null) { // we're using the distributed cache for the reference, // so it's on the local file system srcFs = FileSystem.getLocal(conf); } else srcFs = annPath.getFileSystem(conf); return annPath.makeQualified(srcFs); }
From source file:it.polito.dbdmg.searum.ARM.java
License:Apache License
/** * Generates the header table from the serialized string representation * /*from ww w . j av a2 s .c om*/ * @return Deserialized header table */ public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException { List<Pair<String, Long>> list = new ArrayList<Pair<String, Long>>(); Path[] files = DistributedCache.getLocalCacheFiles(conf); if (files == null) { throw new IOException("Cannot read Frequency list from Distributed Cache"); } if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } FileSystem fs = FileSystem.getLocal(conf); Path fListLocalPath = fs.makeQualified(files[0]); // Fallback if we are running locally. if (!fs.exists(fListLocalPath)) { URI[] filesURIs = DistributedCache.getCacheFiles(conf); if (filesURIs == null) { throw new IOException("Cannot read header table from Distributed Cache"); } if (filesURIs.length != 1) { throw new IOException("Cannot read header table from Distributed Cache (" + files.length + ')'); } fListLocalPath = new Path(filesURIs[0].getPath()); } for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(fListLocalPath, true, conf)) { list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get())); } return list; }
From source file:ivory.cascade.retrieval.RunQueryLocalCascade.java
License:Apache License
public RunQueryLocalCascade(String[] args) throws SAXException, IOException, ParserConfigurationException, Exception, NotBoundException { LOG.info("Initializing QueryRunner..."); Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); runner = new CascadeBatchQueryRunner(args, fs); }
From source file:ivory.core.data.dictionary.DefaultFrequencySortedDictionaryTest.java
License:Apache License
@Test public void test1() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); Path termsFilePath = new Path("etc/trec-index-terms.dat"); Path termIDsFilePath = new Path("etc/trec-index-termids.dat"); Path idToTermFilePath = new Path("etc/trec-index-termid-mapping.dat"); DefaultFrequencySortedDictionary dictionary = new DefaultFrequencySortedDictionary(termsFilePath, termIDsFilePath, idToTermFilePath, fs); assertEquals(312232, dictionary.size()); assertEquals("page", dictionary.getTerm(1)); assertEquals("time", dictionary.getTerm(2)); assertEquals("will", dictionary.getTerm(3)); assertEquals("year", dictionary.getTerm(4)); assertEquals("nikaan", dictionary.getTerm(100000)); assertEquals(1, dictionary.getId("page")); assertEquals(2, dictionary.getId("time")); assertEquals(3, dictionary.getId("will")); assertEquals(4, dictionary.getId("year")); assertEquals(100000, dictionary.getId("nikaan")); assertEquals(null, dictionary.getTerm(312233)); Iterator<String> iter = dictionary.iterator(); assertTrue(iter.hasNext());//www . j a v a 2 s. c o m assertEquals("page", iter.next()); assertTrue(iter.hasNext()); assertEquals("time", iter.next()); assertTrue(iter.hasNext()); assertEquals("will", iter.next()); assertTrue(iter.hasNext()); assertEquals("year", iter.next()); assertTrue(iter.hasNext()); int cnt = 0; for (@SuppressWarnings("unused") String s : dictionary) { cnt++; } assertEquals(dictionary.size(), cnt); cnt = 0; iter = dictionary.iterator(); while (iter.hasNext()) { cnt++; iter.next(); } assertEquals(dictionary.size(), cnt); }