List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: RecordCount <input_query> <output_path>"); System.exit(1);/*from w ww. j a v a 2 s .com*/ } String inputQuery = args[0]; String output = args[1]; Job job = Job.getInstance(getConf()); job.setJobName("recordcount"); job.setJarByClass(RecordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery); job.setInputFormatClass(RecordServiceInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java
License:Apache License
public void run(String[] args) throws Exception { boolean useRecordService = true; if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } else if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);/*from w w w .j a v a 2s . com*/ } String input = args[0].trim(); String output = args[1]; JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); if (useRecordService) { conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); RecordServiceConfig.setInput(conf, input); } else { conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); } FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf); System.out.println("Done"); }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static boolean sampleGroupExists(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return false; }/* w ww. j a v a2 s . c om*/ for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { if (fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup)).length > 0) { return true; } } } return false; }
From source file:com.cloudera.science.quince.FileUtils.java
License:Open Source License
public static void deleteSampleGroup(Path path, Configuration conf, String sampleGroup) throws IOException { FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { return;// ww w . jav a 2 s. com } for (FileStatus chrStatus : fs.listStatus(path, new PartitionPathFilter("chr"))) { for (FileStatus posStatus : fs.listStatus(chrStatus.getPath(), new PartitionPathFilter("pos"))) { for (FileStatus sampleGroupStatus : fs.listStatus(posStatus.getPath(), new PartitionPathFilter("sample_group", sampleGroup))) { fs.delete(sampleGroupStatus.getPath(), true); } } } }
From source file:com.cloudera.seismic.segy.SegyUnloader.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption("input", true, "SU sequence files to export from Hadoop"); options.addOption("output", true, "The local SU file to write"); // Parse the commandline and check for required arguments. CommandLine cmdLine = new PosixParser().parse(options, args, false); if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) { System.out.println("Mising required input/output arguments"); new HelpFormatter().printHelp("SegyUnloader", options); System.exit(1);/*from w w w. j a v a 2 s . c om*/ } Configuration conf = getConf(); FileSystem hdfs = FileSystem.get(conf); Path inputPath = new Path(cmdLine.getOptionValue("input")); if (!hdfs.exists(inputPath)) { System.out.println("Input path does not exist"); System.exit(1); } PathFilter pf = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } }; DataOutputStream os = new DataOutputStream(new FileOutputStream(cmdLine.getOptionValue("output"))); for (FileStatus fs : hdfs.listStatus(inputPath, pf)) { write(fs.getPath(), os, conf); } os.close(); return 0; }
From source file:com.cloudera.sparkwordcount.ipWordCount.java
License:Apache License
public static void main(String[] args) { JavaSparkContext sc = new JavaSparkContext( new SparkConf().set("spark.dynamicAllocation.initialExecutors", "5").setAppName("Spark Count")); // sc.addJar(""); // final Logger logger = Logger.getLogger("org"); // logger.setLevel(Level.INFO); final int threshold = Integer.parseInt(args[1]); JavaRDD<String> stringJavaRDD = sc.textFile(args[0]); JavaRDD<String> filteredRDD = stringJavaRDD.filter(new Function<String, Boolean>() { @Override// www .j a va 2 s .c o m public Boolean call(String value) throws Exception { if (value.contains("TIME_STAMP")) { return false; } RdrRaw line = RdrParser.parseRdr(value); if (line == null) { System.out.println("can't pars rdr"); return false; } String url = line.dstHost; if (url.trim().isEmpty()) { return false; } //System.out.println(url); return true; } }); JavaPairRDD<RdrRaw, Integer> countsIp = filteredRDD.mapToPair(new PairFunction<String, RdrRaw, Integer>() { @Override public Tuple2<RdrRaw, Integer> call(String s) throws Exception { RdrRaw rdrRaw = RdrParser.parseRdr(s); return new Tuple2<RdrRaw, Integer>(rdrRaw, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) throws Exception { return i1 + i2; } }); // filter out words with less than threshold occurrences JavaPairRDD<RdrRaw, Integer> filtered = countsIp.filter(new Function<Tuple2<RdrRaw, Integer>, Boolean>() { @Override public Boolean call(Tuple2<RdrRaw, Integer> rdrRawIntegerTuple2) throws Exception { return rdrRawIntegerTuple2._2() > threshold; } }); JavaPairRDD<Integer, RdrRaw> finalPair = filtered .mapToPair(new PairFunction<Tuple2<RdrRaw, Integer>, Integer, RdrRaw>() { @Override public Tuple2<Integer, RdrRaw> call(Tuple2<RdrRaw, Integer> item) throws Exception { return item.swap(); } }).sortByKey(false); // List<Tuple2<Integer, RdrRaw>> collect = finalPair.take(10); StringBuilder msgBody = new StringBuilder(); for (Tuple2<Integer, RdrRaw> rdrInTuple2 : collect) { RdrRaw rdrRaw = rdrInTuple2._2(); Integer count = rdrInTuple2._1(); msgBody.append(rdrRaw.dstHost) // .append(rdrRaw.dstParam) .append(" found [").append(count).append("]\n"); } Configuration conf = new Configuration(); try { Path p = new Path(args[2]); FileSystem fs = FileSystem.get(new Configuration()); boolean exists = fs.exists(p); if (exists) { fs.delete(p, true); } FileSystem hdfs = FileSystem.get(conf); FSDataOutputStream out = hdfs.create(p); ByteArrayInputStream in = new ByteArrayInputStream(msgBody.toString().getBytes()); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } p = new Path(args[2] + "_all"); if (fs.exists(p)) { fs.delete(p, true); } finalPair.saveAsTextFile(args[2] + "_all"); } catch (IOException e) { e.printStackTrace(); } sc.stop(); /* Properties props = new Properties(); props.put("mail.smtps.host","smtp.gmail.com"); props.put("mail.smtps.auth", "true"); Session session = Session.getDefaultInstance(props, null); System.out.println("try send email"); try { Message msg = new MimeMessage(session); msg.setFrom(new InternetAddress("spark@hadoop.com", "Spark Generated Message")); msg.addRecipient(Message.RecipientType.TO, new InternetAddress("fesswoodwork@gmail.com", "Spark Responder")); msg.setSubject("Spark task finished"); msg.setText(msgBody.toString()); SMTPTransport t = (SMTPTransport)session.getTransport("smtps"); t.connect("smtp.gmail.com", "fesswoodwork", "9610792adc"); t.sendMessage(msg, msg.getAllRecipients()); Transport.send(msg); } catch (AddressException e) { e.printStackTrace(); System.out.println("AddressException "+e.getMessage()); } catch (MessagingException e) { e.printStackTrace(); System.out.println("MessagingException " + e.getMessage()); } catch (UnsupportedEncodingException e) { e.printStackTrace(); System.out.println("UnsupportedEncodingException " + e.getMessage()); } System.out.println("sending successfully ends");*/ /* // split each document into words JavaRDD<String> tokenized = stringJavaRDD.flatMap( new FlatMapFunction<String, String>() { @Override public Iterable<String> call(String s) { return Arrays.asList(s.split(" ")); } } ); // count the occurrence of each word JavaPairRDD<String, Integer> counts = tokenized.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } } ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } } ); // filter out words with less than threshold occurrences JavaPairRDD<String, Integer> filtered = counts.filter( new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> tup) { return tup._2() >= threshold; } } ); // count characters JavaPairRDD<Character, Integer> charCounts = filtered.flatMap( new FlatMapFunction<Tuple2<String, Integer>, Character>() { @Override public Iterable<Character> call(Tuple2<String, Integer> s) { Collection<Character> chars = new ArrayList<Character>(s._1().length()); for (char c : s._1().toCharArray()) { chars.add(c); } return chars; } } ).mapToPair( new PairFunction<Character, Character, Integer>() { @Override public Tuple2<Character, Integer> call(Character c) { return new Tuple2<Character, Integer>(c, 1); } } ).reduceByKey( new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } } ); System.out.println(charCounts.collect()); */ }
From source file:com.cloudera.sqoop.hive.TestHiveImport.java
License:Apache License
/** Test that we can generate a file containing the DDL and not import. */ @Test//from w w w. j a v a 2 s .c o m public void testGenerateOnly() throws IOException { final String TABLE_NAME = "GenerateOnly"; setCurTableName(TABLE_NAME); setNumCols(1); // Figure out where our target generated .q file is going to be. SqoopOptions options = getSqoopOptions(getArgv(false, null), new ImportTool()); Path ddlFile = new Path(new Path(options.getCodeOutputDir()), TABLE_NAME + ".q"); FileSystem fs = FileSystem.getLocal(new Configuration()); // If it's already there, remove it before running the test to ensure // that it's the current test that generated the file. if (fs.exists(ddlFile)) { if (!fs.delete(ddlFile, false)) { LOG.warn("Could not delete previous ddl file: " + ddlFile); } } // Run a basic import, but specify that we're just generating definitions. String[] types = { "INTEGER" }; String[] vals = { "42" }; runImportTest(TABLE_NAME, types, vals, null, getCodeGenArgs(), new CodeGenTool()); // Test that the generated definition file exists. assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile)); Path hiveImportPath = new Path(new Path(options.getWarehouseDir()), TABLE_NAME); assertFalse("Import actually happened!", fs.exists(hiveImportPath)); }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
/** Create the directory where we'll write our test files to; and * make sure it has no files in it.//from w w w .j av a 2s. c om */ private void ensureEmptyWriteDir() throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); Path writeDir = getWritePath(); fs.mkdirs(writeDir); FileStatus[] stats = fs.listStatus(writeDir); for (FileStatus stat : stats) { if (stat.isDir()) { fail("setUp(): Write directory " + writeDir + " contains subdirectories"); } LOG.debug("setUp(): Removing " + stat.getPath()); if (!fs.delete(stat.getPath(), false)) { fail("setUp(): Could not delete residual file " + stat.getPath()); } } if (!fs.exists(writeDir)) { fail("setUp: Could not create " + writeDir); } }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
private void verifyFileExists(Path p) throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); assertTrue("File not found: " + p, fs.exists(p)); }
From source file:com.cloudera.sqoop.io.TestSplittableBufferedWriter.java
License:Apache License
private void verifyFileDoesNotExist(Path p) throws IOException { FileSystem fs = FileSystem.getLocal(getConf()); assertFalse("File found: " + p + " and we did not expect it", fs.exists(p)); }