Example usage for org.apache.hadoop.fs FileSystem mkdirs

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem mkdirs.

Prototype

public boolean mkdirs(Path f) throws IOException

Source Link

Document

Call #mkdirs(Path,FsPermission) with default permission.

Usage

From source file:io.gzinga.hadoop.TestSplittableGZipCodec.java

License:Apache License

@Test
public void testSplittableGZipCodec() {
    try {/* ww  w  . ja  v  a  2s . c o  m*/
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.get(conf);
        fs.mkdirs(new Path("target/test"));
        GZipOutputStreamRandomAccess gzip = new GZipOutputStreamRandomAccess(
                fs.create(new Path("target/test/testfile1.gz")));
        String str = "This is line\n";
        for (int i = 1; i <= 10000; i++) {
            gzip.write(str.getBytes());
            if (i % 100 == 0) {
                gzip.addOffset(i / 100l);
            }
        }
        Assert.assertEquals(gzip.getOffsetMap().size(), 100);
        gzip.close();

        conf.set("mapreduce.framework.name", "local");
        conf.set("io.compression.codecs", "io.gzinga.hadoop.SplittableGZipCodec");
        conf.set("mapreduce.input.fileinputformat.split.maxsize", "20000");
        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(WordCount.TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path("target/test/testfile1.gz"));
        FileOutputFormat.setOutputPath(job, new Path("target/test/testfile2"));
        job.waitForCompletion(true);

        BufferedReader br = new BufferedReader(
                new InputStreamReader(fs.open(new Path("target/test/testfile2/part-r-00000"))));
        Assert.assertEquals("This\t10000", br.readLine());
        Assert.assertEquals("is\t10000", br.readLine());
        Assert.assertEquals("line\t10000", br.readLine());
        br.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    } finally {
        FileUtil.fullyDelete(new File("target/test/testfile2"));
        FileUtil.fullyDelete(new File("target/test/testfile1.gz"));
    }
}

From source file:io.hops.erasure_coding.Encoder.java

License:Apache License

/**
 * The interface to use to generate a parity file.
 * This method can be called multiple times with the same Encoder object,
 * thus allowing reuse of the buffers allocated by the Encoder object.
 *
 * @param fs/*from  w  w w  . j  a  v  a  2  s.c  om*/
 *     The filesystem containing the source file.
 * @param srcFile
 *     The source file.
 * @param parityFile
 *     The parity file to be generated.
 */
public void encodeFile(Configuration jobConf, FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
        short parityRepl, long numStripes, long blockSize, Progressable reporter, StripeReader sReader)
        throws IOException {
    long expectedParityBlocks = numStripes * codec.parityLength;
    long expectedParityFileSize = numStripes * blockSize * codec.parityLength;

    if (!parityFs.mkdirs(parityFile.getParent())) {
        throw new IOException("Could not create parent dir " + parityFile.getParent());
    }
    // delete destination if exists
    if (parityFs.exists(parityFile)) {
        parityFs.delete(parityFile, false);
    }

    // Writing out a large parity file at replication 1 is difficult since
    // some datanode could die and we would not be able to close() the file.
    // So write at replication 2 and then reduce it after close() succeeds.
    short tmpRepl = parityRepl;
    if (expectedParityBlocks >= conf.getInt("raid.encoder.largeparity.blocks", 20)) {
        if (parityRepl == 1) {
            tmpRepl = 2;
        }
    }
    FSDataOutputStream out = parityFs.create(parityFile, true, conf.getInt("io.file.buffer.size", 64 * 1024),
            tmpRepl, blockSize);

    DFSOutputStream dfsOut = (DFSOutputStream) out.getWrappedStream();
    dfsOut.enableParityStream(codec.getStripeLength(), codec.getParityLength(), srcFile.toUri().getPath());

    try {
        encodeFileToStream(fs, srcFile, parityFile, sReader, blockSize, out, reporter);
        out.close();
        out = null;
        LOG.info("Wrote parity file " + parityFile);
        FileStatus tmpStat = parityFs.getFileStatus(parityFile);
        if (tmpStat.getLen() != expectedParityFileSize) {
            throw new IOException("Expected parity size " + expectedParityFileSize + " does not match actual "
                    + tmpStat.getLen());
        }
        if (tmpRepl > parityRepl) {
            parityFs.setReplication(parityFile, parityRepl);
        }
        LOG.info("Wrote parity file " + parityFile);
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

From source file:io.hops.experiments.utils.DFSOperationsUtils.java

License:Apache License

public static void mkdirs(FileSystem dfs, String pathStr) throws IOException {
    if (SERVER_LESS_MODE) {
        serverLessModeRandomWait();//from   w  w  w. j  a va2s  . c  o  m
        return;
    }
    dfs.mkdirs(new Path(pathStr));
}

From source file:io.seqware.pipeline.plugins.sanity.checks.HDFS_Check.java

License:Open Source License

@Override
public boolean check(QueryRunner qRunner, Metadata metadataWS) throws SQLException {
    FileSystem fileSystem = null;

    HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
    if (settings.isEmpty()) {
        return false;
    } else if (!settings.containsKey("FS.DEFAULTFS") || !settings.containsKey("FS.HDFS.IMPL")) {
        return false;
    } else if (!settings.containsKey("HBASE.ZOOKEEPER.QUORUM")
            || !settings.containsKey("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT")
            || !settings.containsKey("HBASE.MASTER") || !settings.containsKey("MAPRED.JOB.TRACKER")) {
        return false;
    }//from w  w  w  .ja  v a  2  s . c  o m

    try {
        Configuration conf = new Configuration();

        conf.set("hbase.zookeeper.quorum", settings.get("HBASE.ZOOKEEPER.QUORUM"));
        conf.set("hbase.zookeeper.property.clientPort", settings.get("HBASE.ZOOKEEPER.PROPERTY.CLIENTPORT"));
        conf.set("hbase.master", settings.get("HBASE.MASTER"));
        conf.set("mapred.job.tracker", settings.get("MAPRED.JOB.TRACKER"));
        conf.set("fs.default.name", settings.get("FS.DEFAULTFS"));
        conf.set("fs.defaultfs", settings.get("FS.DEFAULTFS"));
        conf.set("fs.hdfs.impl", settings.get("FS.HDFS.IMPL"));
        fileSystem = FileSystem.get(conf);
        Path path = new Path("test");
        fileSystem.mkdirs(path);
        fileSystem.deleteOnExit(path);

    } catch (IOException ex) {
        System.err.println("Error connecting to hdfs" + ex.getMessage());
        return false;
    } finally {
        try {
            if (fileSystem != null) {
                fileSystem.close();
            }
        } catch (IOException ex) {
            Logger.getLogger(HDFS_Check.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    return true;
}

From source file:it.isislab.sof.core.engine.hadoop.mapreduce.generic.SOFReducerGeneric.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    String EVALUATION_PROGRAM_THREAD = "evaluation" + Thread.currentThread().getId();
    FileSystem fs = FileSystem.get(conf);

    if (ISLOOP) {
        Path eprogram = new Path(EVALUATION_PROGRAM_THREAD);
        fs.copyToLocalFile(new Path(RATING_PROGRAM), eprogram);
        try {/*w  w w .  j  av a2 s  . c o  m*/
            fs.mkdirs(new Path(this.RATING_PATH));
        } catch (Exception e) {
        }

    }

    if (ISLOOP) {
        Random r = new Random(System.currentTimeMillis());
        String id = MD5(key.toString() + r.nextDouble());
        String tmpEvalXml = "tmpEval" + id + ".xml";
        Path ptemp = new Path(tmpEvalXml);
        Path file_output = new Path(key.toString());
        fs.copyToLocalFile(file_output, ptemp);
        String xmlOutput = key.toString().substring(key.toString().lastIndexOf("/") + 1);
        //generateEvaluation(tmpEvalXml,id,EVALUATION_PROGRAM_THREAD);
        generateEvaluation(tmpEvalXml, xmlOutput, EVALUATION_PROGRAM_THREAD);

        File f = new File(System.getProperty("user.dir") + "/" + EVALUATION_PROGRAM_THREAD);
        f.delete();
    }

}

From source file:it.isislab.sof.core.engine.hadoop.mapreduce.netlogo.SOFReducerNetLogo.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    String EVALUATION_PROGRAM_THREAD = "evaluation" + Thread.currentThread().getId();
    FileSystem fs = FileSystem.get(conf);

    if (ISLOOP) {
        Path eprogram = new Path(EVALUATION_PROGRAM_THREAD);
        fs.copyToLocalFile(new Path(RATING_PROGRAM), eprogram);
        try {/* w  w  w.  ja va2 s .c o  m*/
            fs.mkdirs(new Path(RATING_PATH));
        } catch (Exception e) {
        }

    }

    if (ISLOOP) {
        Random r = new Random(System.currentTimeMillis());
        String id = MD5(key.toString() + r.nextDouble());
        String tmpEvalXml = "tmpEval" + id + ".xml";
        Path ptemp = new Path(tmpEvalXml);
        Path file_output = new Path(key.toString());
        fs.copyToLocalFile(file_output, ptemp);
        //generateEvaluation(tmpEvalXml,id,EVALUATION_PROGRAM_THREAD);
        String xmlOutput = key.toString().substring(key.toString().lastIndexOf("/") + 1);
        generateEvaluation(tmpEvalXml, xmlOutput, EVALUATION_PROGRAM_THREAD);

        File f = new File(System.getProperty("user.dir") + "/" + EVALUATION_PROGRAM_THREAD);
        f.delete();
    }

}

From source file:it.tizianofagni.sparkboost.DataUtils.java

License:Apache License

/**
 * Write a text file on Hadoop file system by using standard Hadoop API.
 *
 * @param outputPath The file to be written.
 * @param content    The content to put in the file.
 *///from  w  w  w. j a  v  a 2  s .co  m
public static void saveHadoopTextFile(String outputPath, String content) {
    try {
        Configuration configuration = new Configuration();
        Path file = new Path(outputPath);
        Path parentFile = file.getParent();
        FileSystem hdfs = FileSystem.get(file.toUri(), configuration);
        if (parentFile != null)
            hdfs.mkdirs(parentFile);
        OutputStream os = hdfs.create(file, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
        br.write(content);
        br.close();
        hdfs.close();
    } catch (Exception e) {
        throw new RuntimeException("Writing Hadoop text file", e);
    }
}

From source file:ivory.app.PreprocessClueWebEnglish.java

License:Apache License

/**
 * Runs this tool./* w  w w  . ja  v a  2 s . c om*/
 */
@SuppressWarnings({ "static-access" })
@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    ;

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) collection path")
            .create(PreprocessCollection.COLLECTION_PATH));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) index path")
            .create(PreprocessCollection.INDEX_PATH));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("(required) segment").create(SEGMENT));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(PreprocessCollection.COLLECTION_PATH)
            || !cmdline.hasOption(PreprocessCollection.INDEX_PATH) || !cmdline.hasOption(SEGMENT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String collection = cmdline.getOptionValue(PreprocessCollection.COLLECTION_PATH);
    String indexPath = cmdline.getOptionValue(PreprocessCollection.INDEX_PATH);
    int segment = Integer.parseInt(cmdline.getOptionValue(SEGMENT));

    LOG.info("Tool name: " + PreprocessClueWebEnglish.class.getSimpleName());
    LOG.info(" - collection path: " + collection);
    LOG.info(" - index path: " + indexPath);
    LOG.info(" - segement: " + segment);

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Create the index directory if it doesn't already exist.
    Path p = new Path(indexPath);
    if (!fs.exists(p)) {
        LOG.info("index path doesn't exist, creating...");
        fs.mkdirs(p);
    } else {
        LOG.info("Index directory " + p + " already exists!");
        return -1;
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);
    Path mappingFile = env.getDocnoMappingData();
    new ClueWarcDocnoMappingBuilder().build(new Path(collection), mappingFile, conf);

    conf.set(Constants.CollectionName, "ClueWeb:English:Segment" + segment);
    conf.set(Constants.CollectionPath, collection);
    conf.set(Constants.IndexPath, indexPath);
    conf.set(Constants.InputFormat, SequenceFileInputFormat.class.getCanonicalName());
    conf.set(Constants.Tokenizer, GalagoTokenizer.class.getCanonicalName());
    conf.set(Constants.DocnoMappingClass, ClueWarcDocnoMapping.class.getCanonicalName());
    conf.set(Constants.DocnoMappingFile, env.getDocnoMappingData().toString());

    conf.setInt(Constants.DocnoOffset, DOCNO_OFFSETS[segment]);
    conf.setInt(Constants.MinDf, 10);
    conf.setInt(Constants.MaxDf, Integer.MAX_VALUE);

    new BuildTermDocVectors(conf).run();
    new ComputeGlobalTermStatistics(conf).run();
    new BuildDictionary(conf).run();
    new BuildIntDocVectors(conf).run();

    new BuildIntDocVectorsForwardIndex(conf).run();
    new BuildTermDocVectorsForwardIndex(conf).run();

    return 0;
}

From source file:ivory.app.PreprocessCollection.java

License:Apache License

/**
 * Runs this tool.//from ww w  .  j  a v  a  2 s.  co m
 */
@Override
public int run(String[] args) throws Exception {
    Options options = createOptions();

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(COLLECTION_PATH) || !cmdline.hasOption(COLLECTION_NAME)
            || !cmdline.hasOption(INDEX_PATH) || !cmdline.hasOption(DOCNO_MAPPING)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String collection = cmdline.getOptionValue(COLLECTION_PATH);
    String collectionName = cmdline.getOptionValue(COLLECTION_NAME);
    String indexPath = cmdline.getOptionValue(INDEX_PATH);
    int docnoOffset = 0;

    if (cmdline.hasOption(DOCNO_OFFSET)) {
        docnoOffset = Integer.parseInt(cmdline.getOptionValue(DOCNO_OFFSET));
    }

    Class<? extends DocnoMapping> docnoMappingClass = null;
    try {
        docnoMappingClass = (Class<? extends DocnoMapping>) Class
                .forName(cmdline.getOptionValue(DOCNO_MAPPING));
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }

    @SuppressWarnings("rawtypes")
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    if (cmdline.hasOption(INPUTFORMAT)) {
        try {
            inputFormatClass = (Class<? extends InputFormat<?, ?>>) Class
                    .forName(cmdline.getOptionValue(INPUTFORMAT));
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }

    Class<? extends Tokenizer> tokenizerClass = GalagoTokenizer.class;
    if (cmdline.hasOption(TOKENIZER)) {
        try {
            tokenizerClass = (Class<? extends Tokenizer>) Class.forName(cmdline.getOptionValue(TOKENIZER));
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }

    int minDf = 2;
    if (cmdline.hasOption(MIN_DF)) {
        minDf = Integer.parseInt(cmdline.getOptionValue(MIN_DF));
    }

    LOG.info("Tool name: " + this.getClass().getSimpleName());
    LOG.info(String.format(" -%s %s", COLLECTION_PATH, collection));
    LOG.info(String.format(" -%s %s", COLLECTION_NAME, collectionName));
    LOG.info(String.format(" -%s %s", INDEX_PATH, indexPath));
    LOG.info(String.format(" -%s %s", DOCNO_MAPPING, docnoMappingClass.getCanonicalName()));
    LOG.info(String.format(" -%s %s", INPUTFORMAT, inputFormatClass.getCanonicalName()));
    LOG.info(String.format(" -%s %s", TOKENIZER, tokenizerClass.getCanonicalName()));
    LOG.info(String.format(" -%s %d", MIN_DF, minDf));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    // Create the index directory if it doesn't already exist.
    Path p = new Path(indexPath);
    if (!fs.exists(p)) {
        LOG.info("Index directory " + p + " doesn't exist, creating.");
        fs.mkdirs(p);
    } else {
        LOG.info("Index directory " + p + " already exists!");
        return -1;
    }

    RetrievalEnvironment env = new RetrievalEnvironment(indexPath, fs);

    conf.set(Constants.CollectionName, collectionName);
    conf.set(Constants.CollectionPath, collection);
    conf.set(Constants.IndexPath, indexPath);
    conf.set(Constants.InputFormat, inputFormatClass.getCanonicalName());
    conf.set(Constants.Tokenizer, tokenizerClass.getCanonicalName());
    conf.set(Constants.DocnoMappingClass, docnoMappingClass.getCanonicalName());
    conf.set(Constants.DocnoMappingFile, env.getDocnoMappingData().toString());

    conf.setInt(Constants.DocnoOffset, docnoOffset);
    conf.setInt(Constants.MinDf, minDf);
    conf.setInt(Constants.MaxDf, Integer.MAX_VALUE);

    Path mappingFile = env.getDocnoMappingData();
    docnoMappingClass.newInstance().getBuilder().build(new Path(collection), mappingFile, conf);

    new BuildTermDocVectors(conf).run();
    new ComputeGlobalTermStatistics(conf).run();
    new BuildDictionary(conf).run();
    new BuildIntDocVectors(conf).run();

    new BuildIntDocVectorsForwardIndex(conf).run();
    new BuildTermDocVectorsForwardIndex(conf).run();

    return 0;
}

From source file:ivory.app.PreprocessTrecForeign.java

License:Apache License

@SuppressWarnings("static-access")
private Configuration parseArgs(String[] args) {
    Configuration conf = getConf();
    options = new Options();
    options.addOption(OptionBuilder.withDescription("tokenizer class").withArgName("class").hasArg()
            .isRequired().create(TOKENIZER_CLASS_OPTION));
    options.addOption(OptionBuilder.withDescription("path to tokenizer model file/directory")
            .withArgName("path").hasArg().create(TOKENIZER_MODEL_OPTION));
    options.addOption(OptionBuilder.withDescription("path to index directory").withArgName("path").hasArg()
            .isRequired().isRequired().create(INDEX_PATH_OPTION));
    options.addOption(OptionBuilder.withDescription("path to XML collection file").withArgName("path").hasArg()
            .isRequired().create(INPUT_PATH_OPTION));
    options.addOption(OptionBuilder.withDescription("two-letter collection language code")
            .withArgName("en|de|fr|zh|es|ar|tr").hasArg().isRequired().create(LANGUAGE_OPTION));
    options.addOption(OptionBuilder.withDescription("path to stopwords file").withArgName("path").hasArg()
            .create(STOPWORDS_OPTION));//from   w w w. ja  v  a  2s  .  c o  m
    options.addOption(OptionBuilder.withDescription("collection name").withArgName("path").hasArg()
            .create(COLLECTION_NAME_OPTION));
    try {

        FileSystem fs = FileSystem.get(conf);

        CommandLine cmdline;
        CommandLineParser parser = new GnuParser();
        cmdline = parser.parse(options, args);

        String collection = cmdline.getOptionValue(INPUT_PATH_OPTION);
        String indexRootPath = cmdline.getOptionValue(INDEX_PATH_OPTION);
        String language = cmdline.getOptionValue(LANGUAGE_OPTION);
        String tokenizerClass = cmdline.getOptionValue(TOKENIZER_CLASS_OPTION);
        String stopwordsFile = null;
        String tokenizerPath = null;

        conf.set(Constants.CollectionPath, collection);
        conf.set(Constants.IndexPath, indexRootPath);
        conf.set(Constants.Tokenizer, tokenizerClass);
        conf.set(Constants.Language, language);

        if (cmdline.hasOption(COLLECTION_NAME_OPTION)) {
            conf.set(Constants.CollectionName, cmdline.getOptionValue(COLLECTION_NAME_OPTION));
        }
        if (cmdline.hasOption(STOPWORDS_OPTION)) {
            stopwordsFile = cmdline.getOptionValue(STOPWORDS_OPTION);
            conf.set(Constants.StopwordList, stopwordsFile);
        }
        if (cmdline.hasOption(TOKENIZER_MODEL_OPTION)) {
            tokenizerPath = cmdline.getOptionValue(TOKENIZER_MODEL_OPTION);
            conf.set(Constants.TokenizerData, tokenizerPath);
        }

        LOG.info("Tool name: " + PreprocessTrecForeign.class.getCanonicalName());
        LOG.info(" - Collection path: " + collection);
        LOG.info(" - Index path: " + indexRootPath);
        LOG.info(" - Language: " + language);
        LOG.info(" - Stop-word removal?: " + stopwordsFile);
        LOG.info(" - Tokenizer class: " + tokenizerClass);
        LOG.info(" - Tokenizer path: " + tokenizerPath);

        // Create the index directory if it doesn't already exist.
        Path p = new Path(indexRootPath);
        if (!fs.exists(p)) {
            LOG.info("index directory doesn't exist, creating...");
            fs.mkdirs(p);
        }
    } catch (IOException exp) {
        LOG.info("Error creating index directory: " + exp.getMessage());
        exp.printStackTrace();
    } catch (ParseException exp) {
        LOG.info("Error parsing command line: " + exp.getMessage());
        throw new RuntimeException();
    }

    return conf;
}