List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:WriteFDFPerformance.java
License:Open Source License
static void writetxt() throws IOException { FSDataOutputStream fos = FileSystem.get(new Configuration()).create(new Path("txt/txt")); fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n"); fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n"); fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n"); fos.close();/*from w ww. ja v a 2s . c om*/ }
From source file:LookupPostings.java
License:Apache License
/** * Runs this tool./*from ww w .j a v a 2 s . co m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostings.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, value); ArrayListWritable<PairOfInts> postings = value.getRightElement(); for (PairOfInts pair : postings) { System.out.println(pair); collection.seek(pair.getLeftElement()); System.out.println(d.readLine()); } key.set("gold"); reader.get(key, value); System.out.println("Complete postings list for 'gold': " + value); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); postings = value.getRightElement(); for (PairOfInts pair : postings) { goldHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } key.set("silver"); reader.get(key, value); System.out.println("Complete postings list for 'silver': " + value); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); postings = value.getRightElement(); for (PairOfInts pair : postings) { silverHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : silverHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } key.set("bronze"); Writable w = reader.get(key, value); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
From source file:Vectors.java
License:Apache License
public static Vector readSequenceFile(Path path, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.getPath().getName().contains("part-")) { SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);//from w w w .j a v a 2 s . c om reader.next(key, value); return value.get(); } finally { IOUtils.closeStream(reader); } } } return null; }
From source file:BwaInterpreter.java
License:Open Source License
private void setTotalInputLength() { try {/* w w w . j a v a 2 s . co m*/ FileSystem fs = FileSystem.get(this.conf); // To get the input files sizes ContentSummary cSummaryFile1 = fs.getContentSummary(new Path(options.getInputPath())); long lengthFile1 = cSummaryFile1.getLength(); long lengthFile2 = 0; if (!options.getInputPath2().isEmpty()) { ContentSummary cSummaryFile2 = fs.getContentSummary(new Path(options.getInputPath())); lengthFile2 = cSummaryFile2.getLength(); } // Total size. Depends on paired or single reads this.totalInputLength = lengthFile1 + lengthFile2; fs.close(); } catch (IOException e) { LOG.error(e.toString()); e.printStackTrace(); } }
From source file:BwaInterpreter.java
License:Open Source License
private void createOutputFolder() { try {//from w ww. j a v a 2s . c o m FileSystem fs = FileSystem.get(this.conf); // Path variable Path outputDir = new Path(options.getOutputPath()); // Directory creation if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); } else { fs.delete(outputDir, true); fs.mkdirs(outputDir); } fs.close(); } catch (IOException e) { LOG.error(e.toString()); e.printStackTrace(); } }
From source file:BwaInterpreter.java
License:Open Source License
private void combineOutputSamFiles(String outputHdfsDir, List<String> returnedValues) { try {/*from www. ja va 2 s.c o m*/ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path finalHdfsOutputFile = new Path(outputHdfsDir + "/FullOutput.sam"); FSDataOutputStream outputFinalStream = fs.create(finalHdfsOutputFile, true); // We iterate over the resulting files in HDFS and agregate them into only one file. for (int i = 0; i < returnedValues.size(); i++) { LOG.info("JMAbuin:: SparkBWA :: Returned file ::" + returnedValues.get(i)); BufferedReader br = new BufferedReader( new InputStreamReader(fs.open(new Path(returnedValues.get(i))))); String line; line = br.readLine(); while (line != null) { if (i == 0 || !line.startsWith("@")) { //outputFinalStream.writeBytes(line+"\n"); outputFinalStream.write((line + "\n").getBytes()); } line = br.readLine(); } br.close(); fs.delete(new Path(returnedValues.get(i)), true); } outputFinalStream.close(); fs.close(); } catch (IOException e) { e.printStackTrace(); LOG.error(e.toString()); } }
From source file:BwaInterpreter.java
License:Open Source License
/** * Runs BWA with the specified options//from w w w. j a v a 2 s. com * @brief This function runs BWA with the input data selected and with the options also selected by the user. */ public void RunBwa() { LOG.info("JMAbuin:: Starting BWA"); Bwa bwa = new Bwa(this.options); List<String> returnedValues; if (bwa.isPairedReads()) { JavaRDD<Tuple2<String, String>> readsRDD = handlePairedReadsSorting(); returnedValues = MapPairedBwa(bwa, readsRDD); } else { JavaRDD<String> readsRDD = handleSingleReadsSorting(); returnedValues = MapSingleBwa(bwa, readsRDD); } LOG.info("BwaRDD :: Total of returned lines from RDDs :: " + returnedValues.size()); // In the case of use a reducer the final output has to be stored in just one file if (bwa.isUseReducer()) { combineOutputSamFiles(bwa.getOutputHdfsDir(), returnedValues); } else { for (String outputFile : returnedValues) { LOG.info("JMAbuin:: SparkBWA:: Returned file ::" + outputFile); } } //After the execution, if the inputTmp exists, it should be deleted try { if ((this.inputTmpFileName != null) && (!this.inputTmpFileName.isEmpty())) { FileSystem fs = FileSystem.get(this.conf); fs.delete(new Path(this.inputTmpFileName), true); fs.close(); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); LOG.error(e.toString()); } }
From source file:BwaInterpreter.java
License:Open Source License
/** * Used to perform the sort operation in HDFS * @brief This function provides a method to perform the sort phase in HDFS * @author Jos M. Abun/*w w w. ja va 2 s. co m*/ * @param fileName1 The first file that contains input FASTQ reads. Stored in HDFS * @param fileName2 The second file that contains input FASTQ reads. Stored in HDFS * @return A JavaRDD that contains the paired reads sorted */ public JavaRDD<Tuple2<String, String>> SortInHDFS2(String fileName1, String fileName2) { Configuration conf = this.conf; LOG.info("JMAbuin:: Starting writing reads to HDFS"); try { FileSystem fs = FileSystem.get(conf); Path outputFilePath = new Path(this.inputTmpFileName); //To write the paired reads FSDataOutputStream outputFinalStream = fs.create(outputFilePath, true); //To read paired reads from both files BufferedReader brFastqFile1 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName1)))); BufferedReader brFastqFile2 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName2)))); String lineFastq1; String lineFastq2; lineFastq1 = brFastqFile1.readLine(); lineFastq2 = brFastqFile2.readLine(); //Loop to read two files. The two of them must have the same line numbers while (lineFastq1 != null) { //The lines are written interspersed outputFinalStream.write((lineFastq1 + "\n" + lineFastq2 + "\n").getBytes()); //Next lines are readed lineFastq1 = brFastqFile1.readLine(); lineFastq2 = brFastqFile2.readLine(); } //Close the input and output files brFastqFile1.close(); brFastqFile2.close(); outputFinalStream.close(); //Now it is time to read the previous created file and create the RDD ContentSummary cSummary = fs.getContentSummary(outputFilePath); long length = cSummary.getLength(); this.totalInputLength = length; fs.close(); //In case of the user does want partitioning if (this.options.getPartitionNumber() != 0) { //These options are set to indicate the split size and get the correct vnumber of partitions this.conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf((length) / this.options.getPartitionNumber())); this.conf.set("mapreduce.input.fileinputformat.split.minsize", String.valueOf((length) / this.options.getPartitionNumber())); LOG.info("JMAbuin partitioning from HDFS:: " + String.valueOf((length) / this.options.getPartitionNumber())); //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class, String.class, this.conf).mapPartitions(new BigFastq2RDDPartitionsDouble(), true); } else { //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class, String.class, this.conf).map(new BigFastq2RDDDouble()); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); LOG.error(e.toString()); return null; } }
From source file:TestStringRelevance.java
License:Apache License
public TestStringRelevance() throws IOException { fs = FileSystem.get(new Configuration()); Relevance.TEST_MODE = true; }
From source file:WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override//from www . ja va 2 s .co m public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); // Job job = Job.getInstance(getConf()); JobConf conf = new JobConf(WikipediaDocnoMappingBuilder.class); conf.setJarByClass(WikipediaDocnoMappingBuilder.class); conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); conf.setBoolean(KEEP_ALL_OPTION, keepAll); // .getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { conf.set("wiki.language", language); } conf.setNumReduceTasks(1); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(tmpPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); conf.setInputFormat(WikipediaPageInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); // job.waitForCompletion(true); RunningJob job = JobClient.runJob(conf); job.waitForCompletion(); // JobClient jobClient = new JobClient(conf); long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; }