List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java
License:Open Source License
@Override public int run(String[] arg) throws Exception { try {/* ww w . j a v a 2 s .c o m*/ Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName()); job.setJarByClass(MobileLog.class); /***/ Path inputPath = new Path(arg[0]); FileInputFormat.addInputPath(job, inputPath); job.setMapperClass(MobileMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); /***/ Path outputPath = new Path(arg[1]); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); job.setReducerClass(MobileReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {// w w w . j av a 2 s . c o m CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setMapperClass(CDFMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(GeneralReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } boolean status = job.waitForCompletion(true); if (!status) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {/*from w ww . j av a 2 s .co m*/ CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE CommonVariables.hBaseHelper = new HBaseHelper(this.getConf()); // ?? String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setMapperClass(UserMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } // boolean state = job.waitForCompletion(true); if (!state) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.liveramp.cascading_ext.FileSystemHelper.java
License:Apache License
/** * Recursively print the path and children to stdout. */// ww w . j a va2s. c o m public static void printFiles(String path) throws IOException { FileSystem fs = getFS(); if (fs.exists(new Path(path))) { printFiles(fs, new Path(path), 0); } else { System.out.println("no files at " + path); } }
From source file:com.liveramp.cascading_ext.tap.NullTap.java
License:Apache License
@Override public void onCompleted(Flow flow) { try {//from w w w .ja va 2s. c o m FileSystem fs = FileSystem.get((JobConf) flow.getConfig()); if (fs.exists(getPath())) { LOG.info("Deleting NullTap path: " + getPath()); TrashHelper.deleteUsingTrashIfEnabled(fs, getPath()); } } catch (IOException e) { throw new TapException(e); } }
From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(FileSystem fs, JobConf conf) throws IOException { String outputPath = getJobOutputPath(conf); // ignore filesystem coming fs = FileSystemHelper.getFileSystemForPath(outputPath); if (fs.exists(new Path(outputPath))) { throw new RuntimeException("Output path already exists: " + outputPath); }//from ww w. j av a 2s . c om }
From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java
License:Apache License
public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger) throws IOException { if (!fs.exists(srcDir)) { return;/*from ww w .java 2 s . c o m*/ } if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) { throw new IllegalArgumentException(srcDir + " is not a directory"); } if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) { throw new IllegalArgumentException(dstDir + " is not a directory"); } if (logger.isDebugEnabled()) { logger.debug("Moving contents of: " + srcDir + " to: " + dstDir); } FileStatus[] files = fs.listStatus(srcDir); for (FileStatus file : files) { Path sourcePath = file.getPath(); Path targetPath = new Path(dstDir, file.getPath().getName()); if (logger.isDebugEnabled()) { logger.debug("Moving: " + sourcePath + " to: " + targetPath); } if (!fs.mkdirs(targetPath.getParent())) { throw new IOException("Failed at creating directory " + targetPath.getParent()); } if (!fs.rename(sourcePath, targetPath)) { throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath); } } fs.delete(srcDir); }
From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java
License:Apache License
public static void commitJob(String domainName, JobConf conf) throws IOException { Path outputPath = new Path(DomainBuilderProperties.getOutputPath(domainName, conf)); Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf)); FileSystem fs = outputPath.getFileSystem(conf); // Create outputPath fs.mkdirs(outputPath);//w ww . j a va 2 s.c o m // Move temporary output to final output LOG.info("Moving temporary output files from: " + tmpOutputPath + " to final output path: " + outputPath); /* Current multithreading handles each partition separately. * Could use a higher level of granularity and have each file copying * performed as a separate Runnable. */ final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); Set<Integer> copiedPartitions = new HashSet<Integer>(); final List<MoveContentsAndDeleteTask> tasks = new ArrayList<MoveContentsAndDeleteTask>(); // Copy complete partitions copyPartitionsFrom(tmpOutputPath, fs, copiedPartitions, tasks, executor, outputPath); // Copy missing partitions from the empty partitions directory Path emptyPartitionsPath = new Path(tmpOutputPath, DomainBuilderAbstractOutputFormat.EMPTY_PARTITIONS_DIR); if (fs.exists(emptyPartitionsPath)) { copyPartitionsFrom(emptyPartitionsPath, fs, copiedPartitions, tasks, executor, outputPath); } executor.shutdown(); try { boolean allCopiersFinished = false; while (!allCopiersFinished) { allCopiersFinished = executor.awaitTermination(WAIT_CYCLE_SECONDS, TimeUnit.SECONDS); } } catch (InterruptedException e) { throw new IOException("Executor interrupted", e); } for (MoveContentsAndDeleteTask task : tasks) { if (task.exception != null) { throw new IOException("Partition copying failed for " + task.srcDir, task.exception); } } // Finally, cleanup cleanupJob(domainName, conf); }
From source file:com.liveramp.hank.hadoop.DomainBuilderOutputCommitter.java
License:Apache License
public static void cleanupJob(String domainName, JobConf conf) throws IOException { Path tmpOutputPath = new Path(DomainBuilderProperties.getTmpOutputPath(domainName, conf)); // Delete temporary output path FileSystem fs = tmpOutputPath.getFileSystem(conf); if (fs.exists(tmpOutputPath)) { LOG.info("Deleting temporary output path " + tmpOutputPath); TrashHelper.deleteUsingTrashIfEnabled(fs, tmpOutputPath); }//from w w w . java 2s . com }
From source file:com.main.MRSearchMain.java
public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException { long startTime; long endTime; String path = "/home/hadoop/app/hadoop-2.0.0-cdh4.3.0/etc/hadoop/"; Configuration conf = HBaseConfiguration.create(); // conf.set("hbase.zookeeper.quorum", "streamslab.localdomain"); // conf.set("fs.default.name", "hdfs://streamslab.localdomain:8020"); // conf.set("mapred.job.tracker", "hdfs://streamslab.localdomain:50300"); // conf.set("fs.hdfs.impl", // org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); //?,FileSystem? conf.addResource(new Path(path + "core-site.xml")); conf.addResource(new Path(path + "hdfs-site.xml")); conf.addResource(new Path(path + "mapred-site.xml")); /* /* w w w . j av a2 s . c om*/ * ?map */ conf.set("search.license", "C87310"); conf.set("search.color", "10"); conf.set("search.direction", "2"); Job job = new Job(conf, "MRSearchHBase"); System.out.println("search.license: " + conf.get("search.license")); job.setNumReduceTasks(0); job.setJarByClass(MRSearchMain.class); Scan scan = new Scan(); scan.addFamily(FAMILY_NAME); byte[] startRow = Bytes.toBytes("2011010100000"); byte[] stopRow; switch (numOfDays) { case 1: stopRow = Bytes.toBytes("2011010200000"); break; case 10: stopRow = Bytes.toBytes("2011011100000"); break; case 30: stopRow = Bytes.toBytes("2011020100000"); break; case 365: stopRow = Bytes.toBytes("2012010100000"); break; default: stopRow = Bytes.toBytes("2011010101000"); } // ?key scan.setStartRow(startRow); scan.setStopRow(stopRow); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class, Text.class, job); Path outPath = new Path("searchresult"); LOG.info("outPath:" + outPath.toString()); //hdfs FileSystem file = null; try { file = FileSystem.get(conf); } catch (IOException e) { e.printStackTrace(); } // HDFS_File file = new HDFS_File(); // file.DelFile(conf, outPath.getName(), true); // //"hdfs://streamslab.localdomain:8020/ if (file.exists(outPath)) { file.delete(outPath, true); LOG.info("=====delPath " + outPath.toString() + "====="); } FileOutputFormat.setOutputPath(job, outPath);// startTime = System.currentTimeMillis(); job.waitForCompletion(true); endTime = System.currentTimeMillis(); LOG.info("Time used: " + (endTime - startTime)); LOG.info("startRow:" + Text.decode(startRow)); LOG.info("stopRow: " + Text.decode(stopRow)); }