Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:WriteFDFPerformance.java

License:Open Source License

static void writetxt() throws IOException {
    FSDataOutputStream fos = FileSystem.get(new Configuration()).create(new Path("txt/txt"));
    fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n");
    fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n");
    fos.writeBytes(String.valueOf(127) + "," + String.valueOf(1000) + "\r\n");
    fos.close();/*from  w  ww.  ja  v a  2s . c  om*/
}

From source file:LookupPostings.java

License:Apache License

/**
 * Runs this tool./*from ww  w .j a  v a  2 s . co m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(COLLECTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(LookupPostings.class.getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.exit(-1);
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String collectionPath = cmdline.getOptionValue(COLLECTION);

    if (collectionPath.endsWith(".gz")) {
        System.out.println("gzipped collection is not seekable: use compressed version!");
        System.exit(-1);
    }

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(config);
    MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config);

    FSDataInputStream collection = fs.open(new Path(collectionPath));
    BufferedReader d = new BufferedReader(new InputStreamReader(collection));

    Text key = new Text();
    PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>> value = new PairOfWritables<IntWritable, ArrayListWritable<PairOfInts>>();

    System.out.println("Looking up postings for the term \"starcross'd\"");
    key.set("starcross'd");

    reader.get(key, value);

    ArrayListWritable<PairOfInts> postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        System.out.println(pair);
        collection.seek(pair.getLeftElement());
        System.out.println(d.readLine());
    }

    key.set("gold");
    reader.get(key, value);
    System.out.println("Complete postings list for 'gold': " + value);

    Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        goldHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for gold");
    for (PairOfInts pair : goldHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("silver");
    reader.get(key, value);
    System.out.println("Complete postings list for 'silver': " + value);

    Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry();
    postings = value.getRightElement();
    for (PairOfInts pair : postings) {
        silverHist.increment(pair.getRightElement());
    }

    System.out.println("histogram of tf values for silver");
    for (PairOfInts pair : silverHist) {
        System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement());
    }

    key.set("bronze");
    Writable w = reader.get(key, value);

    if (w == null) {
        System.out.println("the term bronze does not appear in the collection");
    }

    collection.close();
    reader.close();

    return 0;
}

From source file:Vectors.java

License:Apache License

public static Vector readSequenceFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus fileStatus : fs.listStatus(path)) {
        if (fileStatus.getPath().getName().contains("part-")) {
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf);
                Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);//from w  w w  .j a  v a 2  s  .  c  om
                reader.next(key, value);
                return value.get();
            } finally {
                IOUtils.closeStream(reader);
            }
        }
    }
    return null;
}

From source file:BwaInterpreter.java

License:Open Source License

private void setTotalInputLength() {
    try {/*  w  w  w . j a  v  a  2  s .  co m*/
        FileSystem fs = FileSystem.get(this.conf);

        // To get the input files sizes
        ContentSummary cSummaryFile1 = fs.getContentSummary(new Path(options.getInputPath()));

        long lengthFile1 = cSummaryFile1.getLength();
        long lengthFile2 = 0;

        if (!options.getInputPath2().isEmpty()) {
            ContentSummary cSummaryFile2 = fs.getContentSummary(new Path(options.getInputPath()));
            lengthFile2 = cSummaryFile2.getLength();
        }

        // Total size. Depends on paired or single reads
        this.totalInputLength = lengthFile1 + lengthFile2;
        fs.close();
    } catch (IOException e) {
        LOG.error(e.toString());
        e.printStackTrace();
    }
}

From source file:BwaInterpreter.java

License:Open Source License

private void createOutputFolder() {
    try {//from w  ww. j  a  v  a  2s  . c  o m
        FileSystem fs = FileSystem.get(this.conf);

        // Path variable
        Path outputDir = new Path(options.getOutputPath());

        // Directory creation
        if (!fs.exists(outputDir)) {
            fs.mkdirs(outputDir);
        } else {
            fs.delete(outputDir, true);
            fs.mkdirs(outputDir);
        }

        fs.close();
    } catch (IOException e) {
        LOG.error(e.toString());
        e.printStackTrace();
    }
}

From source file:BwaInterpreter.java

License:Open Source License

private void combineOutputSamFiles(String outputHdfsDir, List<String> returnedValues) {
    try {/*from www. ja  va  2  s.c o m*/
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        Path finalHdfsOutputFile = new Path(outputHdfsDir + "/FullOutput.sam");
        FSDataOutputStream outputFinalStream = fs.create(finalHdfsOutputFile, true);

        // We iterate over the resulting files in HDFS and agregate them into only one file.
        for (int i = 0; i < returnedValues.size(); i++) {
            LOG.info("JMAbuin:: SparkBWA :: Returned file ::" + returnedValues.get(i));
            BufferedReader br = new BufferedReader(
                    new InputStreamReader(fs.open(new Path(returnedValues.get(i)))));

            String line;
            line = br.readLine();

            while (line != null) {
                if (i == 0 || !line.startsWith("@")) {
                    //outputFinalStream.writeBytes(line+"\n");
                    outputFinalStream.write((line + "\n").getBytes());
                }

                line = br.readLine();
            }
            br.close();

            fs.delete(new Path(returnedValues.get(i)), true);
        }

        outputFinalStream.close();
        fs.close();
    } catch (IOException e) {
        e.printStackTrace();
        LOG.error(e.toString());
    }
}

From source file:BwaInterpreter.java

License:Open Source License

/**
 * Runs BWA with the specified options//from   w w  w.  j a v  a  2  s. com
 * @brief This function runs BWA with the input data selected and with the options also selected by the user.
 */
public void RunBwa() {
    LOG.info("JMAbuin:: Starting BWA");
    Bwa bwa = new Bwa(this.options);

    List<String> returnedValues;
    if (bwa.isPairedReads()) {
        JavaRDD<Tuple2<String, String>> readsRDD = handlePairedReadsSorting();
        returnedValues = MapPairedBwa(bwa, readsRDD);
    } else {
        JavaRDD<String> readsRDD = handleSingleReadsSorting();
        returnedValues = MapSingleBwa(bwa, readsRDD);
    }

    LOG.info("BwaRDD :: Total of returned lines from RDDs :: " + returnedValues.size());

    // In the case of use a reducer the final output has to be stored in just one file
    if (bwa.isUseReducer()) {
        combineOutputSamFiles(bwa.getOutputHdfsDir(), returnedValues);
    } else {
        for (String outputFile : returnedValues) {
            LOG.info("JMAbuin:: SparkBWA:: Returned file ::" + outputFile);
        }
    }

    //After the execution, if the inputTmp exists, it should be deleted
    try {
        if ((this.inputTmpFileName != null) && (!this.inputTmpFileName.isEmpty())) {
            FileSystem fs = FileSystem.get(this.conf);

            fs.delete(new Path(this.inputTmpFileName), true);

            fs.close();
        }

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error(e.toString());

    }
}

From source file:BwaInterpreter.java

License:Open Source License

/**
 * Used to perform the sort operation in HDFS
 * @brief This function provides a method to perform the sort phase in HDFS
 * @author Jos M. Abun/*w w  w.  ja va  2  s.  co m*/
 * @param fileName1 The first file that contains input FASTQ reads. Stored in HDFS
 * @param fileName2 The second file that contains input FASTQ reads. Stored in HDFS
 * @return A JavaRDD that contains the paired reads sorted
 */
public JavaRDD<Tuple2<String, String>> SortInHDFS2(String fileName1, String fileName2) {

    Configuration conf = this.conf;

    LOG.info("JMAbuin:: Starting writing reads to HDFS");

    try {
        FileSystem fs = FileSystem.get(conf);

        Path outputFilePath = new Path(this.inputTmpFileName);

        //To write the paired reads
        FSDataOutputStream outputFinalStream = fs.create(outputFilePath, true);

        //To read paired reads from both files
        BufferedReader brFastqFile1 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName1))));
        BufferedReader brFastqFile2 = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName2))));

        String lineFastq1;
        String lineFastq2;

        lineFastq1 = brFastqFile1.readLine();
        lineFastq2 = brFastqFile2.readLine();

        //Loop to read two files. The two of them must have the same line numbers
        while (lineFastq1 != null) {
            //The lines are written interspersed
            outputFinalStream.write((lineFastq1 + "\n" + lineFastq2 + "\n").getBytes());

            //Next lines are readed
            lineFastq1 = brFastqFile1.readLine();
            lineFastq2 = brFastqFile2.readLine();
        }

        //Close the input and output files
        brFastqFile1.close();
        brFastqFile2.close();
        outputFinalStream.close();

        //Now it is time to read the previous created file and create the RDD
        ContentSummary cSummary = fs.getContentSummary(outputFilePath);

        long length = cSummary.getLength();

        this.totalInputLength = length;

        fs.close();

        //In case of the user does want partitioning
        if (this.options.getPartitionNumber() != 0) {

            //These options are set to indicate the split size and get the correct vnumber of partitions
            this.conf.set("mapreduce.input.fileinputformat.split.maxsize",
                    String.valueOf((length) / this.options.getPartitionNumber()));
            this.conf.set("mapreduce.input.fileinputformat.split.minsize",
                    String.valueOf((length) / this.options.getPartitionNumber()));

            LOG.info("JMAbuin partitioning from HDFS:: "
                    + String.valueOf((length) / this.options.getPartitionNumber()));

            //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD
            return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class,
                    String.class, this.conf).mapPartitions(new BigFastq2RDDPartitionsDouble(), true);

        } else {
            //Using the FastqInputFormatDouble class we get values from the HDFS file. After that, these values are stored in a RDD
            return this.ctx.newAPIHadoopFile(this.inputTmpFileName, FastqInputFormatDouble.class, Long.class,
                    String.class, this.conf).map(new BigFastq2RDDDouble());
        }

    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        LOG.error(e.toString());

        return null;
    }
}

From source file:TestStringRelevance.java

License:Apache License

public TestStringRelevance() throws IOException {
    fs = FileSystem.get(new Configuration());
    Relevance.TEST_MODE = true;
}

From source file:WikipediaDocnoMappingBuilder.java

License:Apache License

@SuppressWarnings("static-access")
@Override//from www .  ja  va  2  s .co  m
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file")
            .create(OUTPUT_FILE_OPTION));
    options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr").hasArg()
            .withDescription("two-letter language code").create(LANGUAGE_OPTION));
    options.addOption(KEEP_ALL_OPTION, false, "keep all pages");

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String language = null;
    if (cmdline.hasOption(LANGUAGE_OPTION)) {
        language = cmdline.getOptionValue(LANGUAGE_OPTION);
        if (language.length() != 2) {
            System.err.println("Error: \"" + language + "\" unknown language!");
            return -1;
        }
    }

    String inputPath = cmdline.getOptionValue(INPUT_OPTION);
    String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION);
    boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION);

    String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000);

    LOG.info("Tool name: " + this.getClass().getName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output file: " + outputFile);
    LOG.info(" - keep all pages: " + keepAll);
    LOG.info(" - language: " + language);

    // Job job = Job.getInstance(getConf());
    JobConf conf = new JobConf(WikipediaDocnoMappingBuilder.class);
    conf.setJarByClass(WikipediaDocnoMappingBuilder.class);
    conf.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath,
            OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language));

    conf.setBoolean(KEEP_ALL_OPTION, keepAll);
    // .getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll);
    if (language != null) {
        conf.set("wiki.language", language);
    }
    conf.setNumReduceTasks(1);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(tmpPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setInputFormat(WikipediaPageInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    // job.waitForCompletion(true);

    RunningJob job = JobClient.runJob(conf);
    job.waitForCompletion();

    // JobClient jobClient = new JobClient(conf);
    long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue()
            : job.getCounters().findCounter(PageTypes.ARTICLE).getValue();

    WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-00000", (int) cnt,
            outputFile);

    FileSystem.get(getConf()).delete(new Path(tmpPath), true);

    return 0;
}