List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:bigsatgps.BigDataHandler.java
License:Open Source License
/** * * @param inpath/*from w w w . j ava2 s. c o m*/ * @throws IOException */ public void SequenceToImage(String inpath) throws IOException { FileSystem fs = FileSystem.get(confHadoop); Path inputFilePath = new Path(inpath); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inputFilePath, confHadoop); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), confHadoop); BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), confHadoop); reader.next(key, value); byte[] imagebytearray = value.copyBytes(); BufferedImage bufferedseq = ImageIO.read(new ByteArrayInputStream(imagebytearray)); String outpath = (inpath.substring(0, inpath.indexOf(".")) + "copy.tif"); System.out.println("image was successfully retrieved and written to " + outpath); ImageIO.write(bufferedseq, "tiff", new File(outpath)); }
From source file:bigsidemodel.AutoCoder.java
License:Apache License
/** * Runs this tool./* w w w. j a v a2 s.c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } /*if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; }*/ //String inputPath = cmdline.getOptionValue(INPUT); //String outputPath = cmdline.getOptionValue(OUTPUT); String inputPath = "qiwang321/best5-mingled-key-56x56/part*"; String outputPath = "shangfu/bigoutput"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath + "0"); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); conf.setInt("num_reduce_task", reduceTasks); conf.set("sidepath", outputPath + "_side/"); Job job0 = Job.getInstance(conf); job0.setJobName(AutoCoder.class.getSimpleName()); job0.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job0.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job0, new Path(inputPath)); FileOutputFormat.setOutputPath(job0, new Path(outputPath + "0")); job0.setInputFormatClass(KeyValueTextInputFormat.class); job0.setOutputFormatClass(SequenceFileOutputFormat.class); job0.setMapOutputKeyClass(PairOfInts.class); job0.setMapOutputValueClass(DataNode.class); job0.setOutputKeyClass(PairOfInts.class); job0.setOutputValueClass(DataNode.class); job0.setMapperClass(MyMapper0.class); job0.setReducerClass(MyReducer0.class); job0.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath + "0"); FileSystem.get(getConf()).delete(outputDir, true); long codeStart = System.currentTimeMillis(); double jobTimeSum = 0; long startTime = System.currentTimeMillis(); job0.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; //======= Job 1 LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + outputPath + "0"); LOG.info(" - output path: " + outputPath + "1"); LOG.info(" - number of reducers: " + 1); int nModel = reduceTasks; reduceTasks = 1; Job job1 = Job.getInstance(conf); job1.setJobName(AutoCoder.class.getSimpleName()); job1.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job1.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job1, new Path(outputPath + "0")); FileOutputFormat.setOutputPath(job1, new Path(outputPath + "1")); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setMapOutputKeyClass(PairOfInts.class); job1.setMapOutputValueClass(DataNode.class); job1.setOutputKeyClass(NullWritable.class); job1.setOutputValueClass(NullWritable.class); job1.setMapperClass(MyMapper1.class); job1.setReducerClass(MyReducer1.class); job1.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. outputDir = new Path(outputPath + "1"); FileSystem.get(getConf()).delete(outputDir, true); startTime = System.currentTimeMillis(); job1.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); jobTimeSum += (System.currentTimeMillis() - startTime) / 1000.0; LOG.info("Final Time: " + ((System.currentTimeMillis() - codeStart) / 1000.0) + " seconds, " + jobTimeSum + " seconds."); return 0; }
From source file:bixo.hadoop.HadoopConfigured.java
License:Apache License
public FileSystem getFileSystem() throws IOException { return FileSystem.get(getConf()); }
From source file:bme.iclef.hadoop.file2seq.LocalSetup.java
License:Apache License
/** Sets up Configuration and LocalFileSystem instances for * Hadoop. Throws Exception if they fail. Does not load any * Hadoop XML configuration files, just sets the minimum * configuration necessary to use the local file system. *///from w w w . ja v a2 s .c o m public LocalSetup() throws Exception { config = new Configuration(); /* Normally set in hadoop-default.xml, without it you get * "java.io.IOException: No FileSystem for scheme: file" */ config.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); fileSystem = FileSystem.get(config); if (fileSystem.getConf() == null) { /* This happens if the FileSystem is not properly * initialized, causes NullPointerException later. */ throw new Exception("LocalFileSystem configuration is null"); } }
From source file:boa.datagen.MapFileGen.java
License:Apache License
public static void main(String[] args) throws Exception { if (SEQ_FILE_PATH.isEmpty()) { System.out.println("Missing path to sequence file. Please specify it in the properties file."); return;//from w w w . j a v a2 s . c o m } String base = "hdfs://boa-njt/"; Configuration conf = new Configuration(); conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); Path path = new Path(SEQ_FILE_PATH); String name = path.getName(); if (fs.isFile(path)) { if (path.getName().equals(MapFile.DATA_FILE_NAME)) { MapFile.fix(fs, path.getParent(), Text.class, BytesWritable.class, false, conf); } else { Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME); fs.rename(path, dataFile); Path dir = new Path(path.getParent(), name); fs.mkdirs(dir); fs.rename(dataFile, new Path(dir, dataFile.getName())); MapFile.fix(fs, dir, Text.class, BytesWritable.class, false, conf); } } else { FileStatus[] files = fs.listStatus(path); for (FileStatus file : files) { path = file.getPath(); if (fs.isFile(path)) { Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME); fs.rename(path, dataFile); MapFile.fix(fs, dataFile.getParent(), Text.class, BytesWritable.class, false, conf); break; } } } fs.close(); }
From source file:boa.datagen.SeqProjectCombiner.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://boa-njt/"); FileSystem fileSystem = FileSystem.get(conf); String base = conf.get("fs.default.name", ""); HashMap<String, String> sources = new HashMap<String, String>(); HashSet<String> marks = new HashSet<String>(); FileStatus[] files = fileSystem.listStatus(new Path(base + "tmprepcache/2015-07")); for (int i = 0; i < files.length; i++) { FileStatus file = files[i];/*from w ww. j ava2 s.c o m*/ String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (marks.contains(s)) continue; Project p = Project .parseFrom(CodedInputStream.newInstance(value.getBytes(), 0, value.getLength())); if (p.getCodeRepositoriesCount() > 0 && p.getCodeRepositories(0).getRevisionsCount() > 0) marks.add(s); sources.put(s, name); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } SequenceFile.Writer w = SequenceFile.createWriter(fileSystem, conf, new Path(base + "repcache/2015-07/projects.seq"), Text.class, BytesWritable.class); for (int i = 0; i < files.length; i++) { FileStatus file = files[i]; String name = file.getPath().getName(); if (name.startsWith("projects-") && name.endsWith(".seq")) { System.out.println("Reading file " + i + " in " + files.length + ": " + name); SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, file.getPath(), conf); final Text key = new Text(); final BytesWritable value = new BytesWritable(); try { while (r.next(key, value)) { String s = key.toString(); if (sources.get(s).equals(name)) w.append(key, value); } } catch (Exception e) { System.err.println(name); e.printStackTrace(); } r.close(); } } w.close(); fileSystem.close(); }
From source file:boa.datagen.SeqRepoImporter.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException { conf = new Configuration(); conf.set("fs.default.name", "hdfs://boa-njt/"); fileSystem = FileSystem.get(conf); base = conf.get("fs.default.name", ""); getProcessedProjects();//from w ww. j av a2s .c om getRepoInfo(); for (int i = 0; i < poolSize; i++) new Thread(new ImportTask(i)).start(); }
From source file:boa.datagen.SeqSort.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); String base = "hdfs://boa-njt/"; conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); String inPath = "/tmprepcache/2015-07/"; StringBuilder sb = new StringBuilder(); FileStatus[] files = fs.listStatus(new Path(base + inPath)); for (int i = 0; i < files.length; i++) { FileStatus file = files[i];/*from w w w. j a va 2s . co m*/ String name = file.getPath().getName(); if (name.startsWith("ast-") && name.endsWith(".seq")) { try { //ToolRunner.run(new Configuration(), new SeqSort(inPath + name, "/tmprepcache/2015-07-sorted/" + name), null); sb.append(name + "\n"); } catch (Throwable t) { t.printStackTrace(); } } } FileIO.writeFileContents(new File("files2sort.txt"), sb.toString()); }
From source file:boa.datagen.SeqSortMerge.java
License:Apache License
public static void main(String[] args) throws IOException { conf.set("fs.default.name", base); FileSystem fs = FileSystem.get(conf); String inPath = "/tmprepcache/2015-07-sorted/"; while (true) { FileStatus[] files = fs.listStatus(new Path(inPath)); if (files.length < 2) break; Path path = new Path(inPath + System.currentTimeMillis()); fs.mkdirs(path);/* www . ja v a2 s . c o m*/ SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class); FileStatus[] candidates = getCandidates(files); System.out.println("Merging " + candidates.length + " from " + files.length); SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length]; for (int i = 0; i < candidates.length; i++) readers[i] = new SequenceFile.Reader(fs, new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf); Text[] keys = new Text[candidates.length]; BytesWritable[] values = new BytesWritable[candidates.length]; read(readers, keys, values); while (true) { int index = min(keys); if (keys[index].toString().isEmpty()) break; w.append(keys[index], values[index]); read(readers[index], keys[index], values[index]); } for (int i = 0; i < readers.length; i++) readers[i].close(); w.close(); for (int i = 0; i < readers.length; i++) fs.delete(new Path(inPath + candidates[i].getPath().getName()), true); } }
From source file:boa.functions.BoaAstIntrinsics.java
License:Apache License
private static void openMap() { final Configuration conf = new Configuration(); try {//from w ww . j av a 2s.co m final FileSystem fs = FileSystem.get(conf); final Path p = new Path("hdfs://boa-njt/", new Path( context.getConfiguration().get("boa.ast.dir", context.getConfiguration().get("boa.input.dir", "repcache/live")), new Path("ast"))); map = new MapFile.Reader(fs, p.toString(), conf); } catch (final Exception e) { e.printStackTrace(); } }