Example usage for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException

Source Link

Document

Check if a path exists.

Usage

From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: RecordCount <input_query> <output_path>");
        System.exit(1);/*from   w ww.  j  a v a  2  s  .com*/
    }
    String inputQuery = args[0];
    String output = args[1];

    Job job = Job.getInstance(getConf());
    job.setJobName("recordcount");
    job.setJarByClass(RecordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);

    RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery);
    job.setInputFormatClass(RecordServiceInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java

License:Apache License

public void run(String[] args) throws Exception {
    boolean useRecordService = true;
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    } else if (args.length != 2) {
        System.err.println("Usage: WordCount <input path> <output path>");
        System.exit(-1);/*from  w w w .j  a v a  2s .  com*/
    }
    String input = args[0].trim();
    String output = args[1];

    JobConf conf = new JobConf(WordCount.class);
    conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    if (useRecordService) {
        conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class);
        RecordServiceConfig.setInput(conf, input);
    } else {
        conf.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(conf, new Path(input));
    }

    FileSystem fs = FileSystem.get(conf);
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    conf.setOutputFormat(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(conf, outputPath);

    JobClient.runJob(conf);
    System.out.println("Done");
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return false;
    }/*  w ww. j  a v a2 s .  c om*/
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            if (fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup)).length > 0) {
                return true;
            }
        }
    }
    return false;
}

From source file:com.cloudera.science.quince.FileUtils.java

License:Open Source License

public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (!fs.exists(path)) {
        return;// ww w  .  jav  a  2 s.  com
    }
    for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) {
        for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) {
            for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(),
                    new PartitionPathFilter("sample_group", sampleGroup))) {
                fs.delete(sampleGroupStatus.getPath(), true);
            }
        }
    }
}

From source file:com.cloudera.seismic.segy.SegyUnloader.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption("input", true, "SU sequence files to export from Hadoop");
    options.addOption("output", true, "The local SU file to write");

    // Parse the commandline and check for required arguments.
    CommandLine cmdLine = new PosixParser().parse(options, args, false);
    if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) {
        System.out.println("Mising required input/output arguments");
        new HelpFormatter().printHelp("SegyUnloader", options);
        System.exit(1);/*from   w  w w. j a  v a  2 s . c  om*/
    }

    Configuration conf = getConf();
    FileSystem hdfs = FileSystem.get(conf);
    Path inputPath = new Path(cmdLine.getOptionValue("input"));
    if (!hdfs.exists(inputPath)) {
        System.out.println("Input path does not exist");
        System.exit(1);
    }

    PathFilter pf = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_");
        }
    };

    DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output")));
    for (FileStatus fs : hdfs.listStatus(inputPath, pf)) {
        write(fs.getPath(), os, conf);
    }
    os.close();

    return 0;
}

From source file:com.cloudera.sparkwordcount.ipWordCount.java

License:Apache License

public static void main(String[] args) {
    JavaSparkContext sc = new JavaSparkContext(
            new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count"));
    // sc.addJar("");
    //   final Logger logger = Logger.getLogger("org");
    // logger.setLevel(Level.INFO);
    final int threshold = Integer.parseInt(args[1]);
    JavaRDD<String> stringJavaRDD = sc.textFile(args[0]);
    JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() {
        @Override//  www .j  a  va 2  s .c  o  m
        public Boolean call(String value) throws Exception {
            if (value.contains("TIME_STAMP")) {
                return false;
            }
            RdrRaw line = RdrParser.parseRdr(value);
            if (line == null) {
                System.out.println("can't pars rdr");
                return false;
            }
            String url = line.dstHost;
            if (url.trim().isEmpty()) {
                return false;
            }
            //System.out.println(url);
            return true;
        }
    });
    JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() {
        @Override
        public Tuple2<RdrRaw, Integer> call(String s) throws Exception {
            RdrRaw rdrRaw = RdrParser.parseRdr(s);
            return new Tuple2<RdrRaw, Integer>(rdrRaw, 1);
        }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) throws Exception {
            return i1 + i2;
        }
    });

    // filter out words with less than threshold occurrences
    JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() {
        @Override
        public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception {
            return rdrRawIntegerTuple2._2() > threshold;
        }
    });
    JavaPairRDD<Integer, RdrRaw> finalPair = filtered
            .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() {
                @Override
                public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception {
                    return item.swap();
                }
            }).sortByKey(false);
    //
    List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10);
    StringBuilder msgBody = new StringBuilder();
    for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) {
        RdrRaw rdrRaw = rdrInTuple2._2();
        Integer count = rdrInTuple2._1();
        msgBody.append(rdrRaw.dstHost)
                // .append(rdrRaw.dstParam)
                .append(" found [").append(count).append("]\n");
    }
    Configuration conf = new Configuration();
    try {
        Path p = new Path(args[2]);
        FileSystem fs = FileSystem.get(new Configuration());
        boolean exists = fs.exists(p);
        if (exists) {
            fs.delete(p, true);
        }
        FileSystem hdfs = FileSystem.get(conf);
        FSDataOutputStream out = hdfs.create(p);
        ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes());
        byte buffer[] = new byte[256];
        int bytesRead = 0;
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }
        p = new Path(args[2] + "_all");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        finalPair.saveAsTextFile(args[2] + "_all");
    } catch (IOException e) {
        e.printStackTrace();
    }

    sc.stop();
    /* Properties props = new Properties();
     props.put("mail.smtps.host","smtp.gmail.com");
     props.put("mail.smtps.auth", "true");
     Session session = Session.getDefaultInstance(props, null);
            
     System.out.println("try send email");
     try {
    Message msg = new MimeMessage(session);
    msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message"));
    msg.addRecipient(Message.RecipientType.TO,
            new InternetAddress("fesswoodwork@gmail.com", "Spark Responder"));
    msg.setSubject("Spark task finished");
    msg.setText(msgBody.toString());
    SMTPTransport t =
            (SMTPTransport)session.getTransport("smtps");
    t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc");
    t.sendMessage(msg, msg.getAllRecipients());
    Transport.send(msg);
            
     } catch (AddressException e) {
        e.printStackTrace();
    System.out.println("AddressException "+e.getMessage());
     } catch (MessagingException e) {
    e.printStackTrace();
    System.out.println("MessagingException " + e.getMessage());
     } catch (UnsupportedEncodingException e) {
    e.printStackTrace();
    System.out.println("UnsupportedEncodingException " + e.getMessage());
     }
     System.out.println("sending successfully ends");*/

    /*      // split each document into words
          JavaRDD<String> tokenized = stringJavaRDD.flatMap(
        new FlatMapFunction<String, String>() {
            @Override
            public Iterable<String> call(String s) {
                return Arrays.asList(s.split(" "));
            }
        }
          );
            
          // count the occurrence of each word
          JavaPairRDD<String, Integer> counts = tokenized.mapToPair(
        new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<String, Integer>(s, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          // filter out words with less than threshold occurrences
          JavaPairRDD<String, Integer> filtered = counts.filter(
        new Function<Tuple2<String, Integer>, Boolean>() {
            @Override
            public Boolean call(Tuple2<String, Integer> tup) {
                return tup._2() >= threshold;
            }
        }
          );
            
          // count characters
          JavaPairRDD<Character, Integer> charCounts = filtered.flatMap(
        new FlatMapFunction<Tuple2<String, Integer>, Character>() {
            @Override
            public Iterable<Character> call(Tuple2<String, Integer> s) {
                Collection<Character> chars = new ArrayList<Character>(s._1().length());
                for (char c : s._1().toCharArray()) {
                    chars.add(c);
                }
                return chars;
            }
        }
          ).mapToPair(
        new PairFunction<Character, Character, Integer>() {
            @Override
            public Tuple2<Character, Integer> call(Character c) {
                return new Tuple2<Character, Integer>(c, 1);
            }
        }
          ).reduceByKey(
        new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        }
          );
            
          System.out.println(charCounts.collect());
          */

}

From source file:com.cloudera.sqoop.hive.TestHiveImport.java

License:Apache License

/** Test that we can generate a file containing the DDL and not import. */
@Test//from  w w  w.  j a  v  a 2  s .c  o m
public void testGenerateOnly() throws IOException {
    final String TABLE_NAME = "GenerateOnly";
    setCurTableName(TABLE_NAME);
    setNumCols(1);

    // Figure out where our target generated .q file is going to be.
    SqoopOptions options = getSqoopOptions(getArgv(false, null), new ImportTool());
    Path ddlFile = new Path(new Path(options.getCodeOutputDir()), TABLE_NAME + ".q");
    FileSystem fs = FileSystem.getLocal(new Configuration());

    // If it's already there, remove it before running the test to ensure
    // that it's the current test that generated the file.
    if (fs.exists(ddlFile)) {
        if (!fs.delete(ddlFile, false)) {
            LOG.warn("Could not delete previous ddl file: " + ddlFile);
        }
    }

    // Run a basic import, but specify that we're just generating definitions.
    String[] types = { "INTEGER" };
    String[] vals = { "42" };
    runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), new CodeGenTool());

    // Test that the generated definition file exists.
    assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));

    Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), TABLE_NAME);
    assertFalse("Import actually happened!", fs.exists(hiveImportPath));
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

/** Create the directory where we'll write our test files to; and
 * make sure it has no files in it.//from w  w  w  .j av a 2s. c  om
 */
private void ensureEmptyWriteDir() throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    Path writeDir = getWritePath();

    fs.mkdirs(writeDir);

    FileStatus[] stats = fs.listStatus(writeDir);

    for (FileStatus stat : stats) {
        if (stat.isDir()) {
            fail("setUp(): Write directory " + writeDir + " contains subdirectories");
        }

        LOG.debug("setUp(): Removing " + stat.getPath());
        if (!fs.delete(stat.getPath(), false)) {
            fail("setUp(): Could not delete residual file " + stat.getPath());
        }
    }

    if (!fs.exists(writeDir)) {
        fail("setUp: Could not create " + writeDir);
    }
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

private void verifyFileExists(Path p) throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    assertTrue("File not found: " + p, fs.exists(p));
}

From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java

License:Apache License

private void verifyFileDoesNotExist(Path p) throws IOException {
    FileSystem fs = FileSystem.getLocal(getConf());
    assertFalse("File found: " + p + " and we did not expect it", fs.exists(p));
}