List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.blackberry.logdriver.admin.LogMaintenance.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from ww w .j av a2 s . c o m // For some reason, Oozie needs some options to be set in system instead of // in the confiuration. So copy the configs over. { Iterator<Entry<String, String>> i = conf.iterator(); while (i.hasNext()) { Entry<String, String> next = i.next(); System.setProperty(next.getKey(), next.getValue()); } } if (args.length < 3) { printUsage(); return 1; } String userName = args[0]; String dcNumber = args[1]; String service = args[2]; String date = null; String hour = null; if (args.length >= 4) { date = args[3]; } if (args.length >= 5) { hour = args[4]; } // Set from environment variables String mergeJobPropertiesFile = getConfOrEnv(conf, "MERGEJOB_CONF"); String filterJobPropertiesFile = getConfOrEnv(conf, "FILTERJOB_CONF"); String daysBeforeArchive = getConfOrEnv(conf, "DAYS_BEFORE_ARCHIVE"); String daysBeforeDelete = getConfOrEnv(conf, "DAYS_BEFORE_DELETE"); String maxConcurrentMR = getConfOrEnv(conf, "MAX_CONCURRENT_MR", "-1"); String zkConnectString = getConfOrEnv(conf, "ZK_CONNECT_STRING"); String logdir = getConfOrEnv(conf, "logdriver.logdir.name"); boolean resetOrphanedJobs = Boolean.parseBoolean(getConfOrEnv(conf, "reset.orphaned.jobs", "true")); String rootDir = getConfOrEnv(conf, "service.root.dir"); String maxTotalMR = getConfOrEnv(conf, "MAX_TOTAL_MR", "-1"); boolean doMerge = true; boolean doArchive = true; boolean doDelete = true; if (zkConnectString == null) { LOG.error("ZK_CONNECT_STRING is not set. Exiting."); return 1; } if (mergeJobPropertiesFile == null) { LOG.info("MERGEJOB_CONF is not set. Not merging."); doMerge = false; } if (filterJobPropertiesFile == null) { LOG.info("FILTERJOB_CONF is not set. Not archiving."); doArchive = false; } if (daysBeforeArchive == null) { LOG.info("DAYS_BEFORE_ARCHIVE is not set. Not archiving."); doArchive = false; } if (doArchive && Integer.parseInt(daysBeforeArchive) < 0) { LOG.info("DAYS_BEFORE_ARCHIVE is negative. Not archiving."); doArchive = false; } if (daysBeforeDelete == null) { LOG.info("DAYS_BEFORE_DELETE is not set. Not deleting."); doDelete = false; } if (doDelete && Integer.parseInt(daysBeforeDelete) < 0) { LOG.info("DAYS_BEFORE_DELETE is negative. Not deleting."); doDelete = false; } if (logdir == null) { LOG.info("LOGDRIVER_LOGDIR_NAME is not set. Using default value of 'logs'."); logdir = "logs"; } if (rootDir == null) { LOG.info("SERVICE_ROOT_DIR is not set. Using default value of 'service'."); rootDir = "/service"; } // We can hang if this fails. So make sure we abort if it fails. fs = null; try { fs = FileSystem.get(conf); fs.exists(new Path("/")); // Test if it works. } catch (IOException e) { LOG.error("Error getting filesystem.", e); return 1; } // Create the LockUtil instance lockUtil = new LockUtil(zkConnectString); // Now it's safe to create our Job Runner JobRunner jobRunner = new JobRunner(Integer.parseInt(maxConcurrentMR), Integer.parseInt(maxTotalMR)); Thread jobRunnerThread = new Thread(jobRunner); jobRunnerThread.setName("JobRunner"); jobRunnerThread.setDaemon(false); jobRunnerThread.start(); // Figure out what date we start filters on. String filterCutoffDate = ""; if (doArchive) { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeArchive)); filterCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR), (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY)); LOG.info("Archiving logs from before {}", filterCutoffDate); } String deleteCutoffDate = ""; if (doDelete) { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-" + daysBeforeDelete)); deleteCutoffDate = String.format("%04d%02d%02d%02d", cal.get(Calendar.YEAR), (cal.get(Calendar.MONTH) + 1), cal.get(Calendar.DAY_OF_MONTH), cal.get(Calendar.HOUR_OF_DAY)); LOG.info("Deleting logs from before {}", deleteCutoffDate); } long now = System.currentTimeMillis(); // Various exceptions have been popping up here. So make sure I catch them // all. try { // Patterns to recognize hour, day and incoming directories, so that they // can be processed. Pattern datePathPattern; Pattern hourPathPattern; Pattern incomingPathPattern; Pattern dataPathPattern; Pattern archivePathPattern; Pattern workingPathPattern; if (hour != null) { datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")"); hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")"); incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/incoming"); dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/data"); archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/archive"); workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(" + Pattern.quote(hour) + ")/([^/]+)/working/([^/]+)_(\\d+)"); } else if (date != null) { datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")"); hourPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})"); incomingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/incoming"); dataPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/data"); archivePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/archive"); workingPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(" + Pattern.quote(date) + ")/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)"); } else { datePathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})"); hourPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})"); incomingPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/incoming"); dataPathPattern = Pattern.compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/data"); archivePathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/archive"); workingPathPattern = Pattern .compile(rootDir + "/" + Pattern.quote(dcNumber) + "/" + Pattern.quote(service) + "/" + Pattern.quote(logdir) + "/(\\d{8})/(\\d{2})/([^/]+)/working/([^/]+)_(\\d+)"); } // Do a depth first search of the directory, processing anything that // looks // interesting along the way Deque<Path> paths = new ArrayDeque<Path>(); Path rootPath = new Path(rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/"); paths.push(rootPath); while (paths.size() > 0) { Path p = paths.pop(); LOG.debug("{}", p.toString()); if (!fs.exists(p)) { continue; } FileStatus dirStatus = fs.getFileStatus(p); FileStatus[] children = fs.listStatus(p); boolean addChildren = true; boolean old = dirStatus.getModificationTime() < now - WAIT_TIME; LOG.debug(" Was last modified {}ms ago", now - dirStatus.getModificationTime()); if (!old) { LOG.debug(" Skipping, since it's not old enough."); } else if ((!rootPath.equals(p)) && (children.length == 0 || (children.length == 1 && children[0].getPath().getName().equals(READY_MARKER)))) { // old and no children? Delete! LOG.info(" Deleting empty directory {}", p.toString()); fs.delete(p, true); } else { Matcher matcher = datePathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.debug("Checking date directory"); // If this is already done, then skip it. So only process if it // doesn't exist. if (fs.exists(new Path(p, READY_MARKER)) == false) { // Check each subdirectory. If they all have ready markers, then I // guess we're ready. boolean ready = true; for (FileStatus c : children) { if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) { ready = false; break; } } if (ready) { fs.createNewFile(new Path(p, READY_MARKER)); } } } matcher = hourPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.debug("Checking hour directory"); // If this is already done, then skip it. So only process if it // doesn't exist. if (fs.exists(new Path(p, READY_MARKER)) == false) { // Check each subdirectory. If they all have ready markers, then I // guess we're ready. boolean ready = true; for (FileStatus c : children) { if (c.isDirectory() && fs.exists(new Path(c.getPath(), READY_MARKER)) == false) { ready = false; break; } } if (ready) { fs.createNewFile(new Path(p, READY_MARKER)); } } } // Check to see if we have to run a merge matcher = incomingPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.debug("Checking incoming directory"); String matchDate = matcher.group(1); String matchHour = matcher.group(2); String matchComponent = matcher.group(3); String timestamp = matchDate + matchHour; if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) { LOG.info("Deleting old directory: {}", p); fs.delete(p, true); addChildren = false; } else if (doMerge) { // old, looks right, and has children? Run it! boolean hasMatchingChildren = false; boolean subdirTooYoung = false; for (FileStatus child : children) { if (!hasMatchingChildren) { FileStatus[] grandchildren = fs.listStatus(child.getPath()); for (FileStatus gc : grandchildren) { if (VALID_FILE.matcher(gc.getPath().getName()).matches()) { hasMatchingChildren = true; break; } } } if (!subdirTooYoung) { if (child.getModificationTime() >= now - WAIT_TIME) { subdirTooYoung = true; LOG.debug(" Subdir {} is too young.", child.getPath()); } } } if (!hasMatchingChildren) { LOG.debug(" No files match the expected pattern ({})", VALID_FILE.pattern()); } if (hasMatchingChildren && !subdirTooYoung) { LOG.info(" Run Merge job {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); Properties jobProps = new Properties(); jobProps.load(new FileInputStream(mergeJobPropertiesFile)); jobProps.setProperty("jobType", "merge"); jobProps.setProperty("rootDir", rootDir); jobProps.setProperty("dcNumber", dcNumber); jobProps.setProperty("service", service); jobProps.setProperty("date", matchDate); jobProps.setProperty("hour", matchHour); jobProps.setProperty("component", matchComponent); jobProps.setProperty("user.name", userName); jobProps.setProperty("logdir", logdir); jobRunner.submit(jobProps); addChildren = false; } } } // Check to see if we need to run a filter and archive matcher = dataPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { String matchDate = matcher.group(1); String matchHour = matcher.group(2); String matchComponent = matcher.group(3); String timestamp = matchDate + matchHour; if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) { LOG.info("Deleting old directory: {}", p); fs.delete(p, true); addChildren = false; } else if (doArchive && timestamp.compareTo(filterCutoffDate) < 0) { Properties jobProps = new Properties(); jobProps.load(new FileInputStream(filterJobPropertiesFile)); jobProps.setProperty("jobType", "filter"); jobProps.setProperty("rootDir", rootDir); jobProps.setProperty("dcNumber", dcNumber); jobProps.setProperty("service", service); jobProps.setProperty("date", matchDate); jobProps.setProperty("hour", matchHour); jobProps.setProperty("component", matchComponent); jobProps.setProperty("user.name", userName); jobProps.setProperty("logdir", logdir); // Check to see if we should just keep all or delete all here. // The filter file should be here String appPath = jobProps.getProperty("oozie.wf.application.path"); appPath = appPath.replaceFirst("\\$\\{.*?\\}", ""); Path filterFile = new Path( appPath + "/" + conf.get("filter.definition.file", service + ".yaml")); LOG.info("Filter file is {}", filterFile); if (fs.exists(filterFile)) { List<BoomFilterMapper.Filter> filters = BoomFilterMapper.loadFilters(matchComponent, fs.open(filterFile)); if (filters == null) { LOG.warn( " Got null when getting filters. Not processing. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); } else if (filters.size() == 0) { LOG.warn(" Got no filters. Not processing. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); } else if (filters.size() == 1 && filters.get(0) instanceof BoomFilterMapper.KeepAllFilter) { LOG.info(" Keeping everything. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); // Move files from data to archive // delete it all! String destination = rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent + "/archive/"; PathInfo pathInfo = new PathInfo(); pathInfo.setDcNumber(dcNumber); pathInfo.setService(service); pathInfo.setLogdir(logdir); pathInfo.setDate(matchDate); pathInfo.setHour(matchHour); pathInfo.setComponent(matchComponent); try { lockUtil.acquireWriteLock(lockUtil.getLockPath(pathInfo)); fs.mkdirs(new Path(destination)); for (FileStatus f : fs.listStatus(p)) { fs.rename(f.getPath(), new Path(destination)); } } finally { lockUtil.releaseWriteLock(lockUtil.getLockPath(pathInfo)); } } else if (filters.size() == 1 && filters.get(0) instanceof BoomFilterMapper.DropAllFilter) { LOG.info(" Dropping everything. {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); PathInfo pathInfo = new PathInfo(); pathInfo.setDcNumber(dcNumber); pathInfo.setService(service); pathInfo.setLogdir(logdir); pathInfo.setDate(matchDate); pathInfo.setHour(matchHour); pathInfo.setComponent(matchComponent); try { lockUtil.acquireWriteLock(lockUtil.getLockPath(pathInfo)); fs.delete(p, true); } finally { lockUtil.releaseWriteLock(lockUtil.getLockPath(pathInfo)); } } else { LOG.info(" Run Filter/Archive job {} :: {} {} {} {} {}", new Object[] { p.toString(), dcNumber, service, matchDate, matchHour, matchComponent }); jobRunner.submit(jobProps); } } else { LOG.warn("Skipping filter job, since no filter file exists"); } addChildren = false; } } matcher = archivePathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { String matchDate = matcher.group(1); String matchHour = matcher.group(2); String timestamp = matchDate + matchHour; if (doDelete && timestamp.compareTo(deleteCutoffDate) < 0) { LOG.info("Deleting old directory: {}", p); fs.delete(p, true); addChildren = false; } } matcher = workingPathPattern.matcher(p.toUri().getPath()); if (matcher.matches()) { LOG.info(" Matches working pattern ({})", p); if (resetOrphanedJobs) { String matchDate = matcher.group(1); String matchHour = matcher.group(2); String matchComponent = matcher.group(3); // Move everything from working/xxx/incoming/ to incoming/ PathInfo lockPathInfo = new PathInfo(logdir, rootDir + "/" + dcNumber + "/" + service + "/" + logdir + "/" + matchDate + "/" + matchHour + "/" + matchComponent); lockUtil.acquireWriteLock(lockUtil.getLockPath(lockPathInfo)); FileStatus[] fileStatuses = fs.listStatus(new Path(p.toUri().getPath() + "/incoming/")); if (fileStatuses != null) { for (FileStatus fileStatus : fileStatuses) { Path toPath = new Path( fileStatus.getPath().getParent().getParent().getParent().getParent(), "incoming/" + fileStatus.getPath().getName()); LOG.info(" Moving data from {} to {}", fileStatus.getPath(), toPath); LOG.info(" mkdir {}", toPath); fs.mkdirs(toPath); Path fromDir = new Path(p.toUri().getPath(), "incoming/" + fileStatus.getPath().getName()); LOG.info(" moving from {}", fromDir); FileStatus[] files = fs.listStatus(fromDir); if (files == null || files.length == 0) { LOG.info(" Nothing to move from {}", fromDir); } else { for (FileStatus f : files) { LOG.info(" rename {} {}", f.getPath(), new Path(toPath, f.getPath().getName())); fs.rename(f.getPath(), new Path(toPath, f.getPath().getName())); } } LOG.info(" rm {}", fileStatus.getPath()); fs.delete(fileStatus.getPath(), true); } lockUtil.releaseWriteLock(lockUtil.getLockPath(lockPathInfo)); fs.delete(new Path(p.toUri().getPath()), true); } } addChildren = false; } } // Add any children which are directories to the stack. if (addChildren) { for (int i = children.length - 1; i >= 0; i--) { FileStatus child = children[i]; if (child.isDirectory()) { paths.push(child.getPath()); } } } } // Since we may have deleted a bunch of directories, delete any unused // locks // from ZooKeeper. { LOG.info("Checking for unused locks in ZooKeeper"); String scanPath = rootDir + "/" + dcNumber + "/" + service + "/" + logdir; if (date != null) { scanPath += "/" + date; if (hour != null) { scanPath += "/" + hour; } } List<LockInfo> lockInfo = lockUtil.scan(scanPath); for (LockInfo li : lockInfo) { // Check if the lock path still exists in HDFS. If it doesn't, then // delete it from ZooKeeper. String path = li.getPath(); String hdfsPath = path.substring(LockUtil.ROOT.length()); if (!fs.exists(new Path(hdfsPath))) { ZooKeeper zk = lockUtil.getZkClient(); while (!path.equals(LockUtil.ROOT)) { try { zk.delete(path, -1); } catch (KeeperException.NotEmptyException e) { // That's fine. just stop trying then. break; } catch (Exception e) { LOG.error("Caught exception trying to delete from ZooKeeper.", e); break; } LOG.info("Deleted from ZooKeeper: {}", path); path = path.substring(0, path.lastIndexOf('/')); } } } } // Now that we're done, wait for the Oozie Runner to stop, and print the // results. LOG.info("Waiting for Oozie jobs to complete."); jobRunner.shutdown(); jobRunnerThread.join(); LOG.info("Job Stats : Started={} Succeeded={} failed={} errors={}", new Object[] { jobRunner.getStarted(), jobRunner.getSucceeded(), jobRunner.getFailed(), jobRunner.getErrors() }); lockUtil.close(); } catch (Exception e) { LOG.error("Unexpected exception caught.", e); return 1; } return 0; }
From source file:com.blackberry.logdriver.util.IndexLogs.java
License:Apache License
private static void updateComponent(Map<String, Map<String, Map<String, Map<String, Component>>>> data, List<String> unmergedCSVStrings, FileSystem fs, FileStatus matchedFolder, Path path) throws IOException, ParseException { // Parse path by splitting it across slashes. To determine service (which might contain slashes) grab // everything after the DC name, but before the matched date string. String[] pathPieces = matchedFolder.getPath().toString().split("/"); String[] servicePieces = path.toString().split(pathPieces[4] + "/"); servicePieces = servicePieces[1].split("/" + pathPieces[pathPieces.length - 5]); String DC = pathPieces[4];//from w w w.j a v a 2 s . c o m String service = servicePieces[0]; String component = pathPieces[pathPieces.length - 2]; String type = pathPieces[pathPieces.length - 5]; String status = pathPieces[pathPieces.length - 1]; Date date = inputFormat.parse(pathPieces[pathPieces.length - 4]); // If the _READY file doesn't exist, add it to the list Path READYPath = new Path(path.toString() + "/_READY"); // System.out.println("Checking for " + READYPath.toString()); if (!fs.exists(READYPath)) { unmergedCSVStrings.add(DC + "," + service + "," + type + "," + component + "," + pathPieces[pathPieces.length - 4] + "," + pathPieces[pathPieces.length - 3] + "\n"); //System.out.println(unmergedCSVString); } // Check if there is a matching component, create one if not. if (!componentExists(data, DC, service, type, component)) { data.get(DC).get(service).get(type).put(component, new Component(DC, service, type, component, date)); } Component thisComponent = data.get(DC).get(service).get(type).get(component); // Update the start or end date if the current date is before or after, respectively. if (date.before(thisComponent.startDate)) { thisComponent.startDate = date; } else if (date.after(thisComponent.endDate)) { thisComponent.endDate = date; } // Is the current folder an archive? If so and date is later than the current archiveDate, update it. if (status.matches("archive") && date.after(thisComponent.archiveDate)) { thisComponent.archiveDate = date; } // Add size data if (status.matches("data")) { thisComponent.addDataSize(fs.getContentSummary(matchedFolder.getPath()).getLength()); } else if (status.matches("incoming")) { thisComponent.addIncomingSize(fs.getContentSummary(matchedFolder.getPath()).getLength()); } else if (status.matches("archive")) { thisComponent.addArchiveSize(fs.getContentSummary(matchedFolder.getPath()).getLength()); } }
From source file:com.bonc.mr_roamRecognition_hjpt.comm.NewFileOutputFormat.java
License:Apache License
/** * Set the {@link Path} of the output directory for the map-reduce job. * * @param job/*from w ww. j av a 2 s . c o m*/ * The job to modify * @param outputDir * the {@link Path} of the output directory for the map-reduce * job. */ public static void setOutputPath(Job job, Path outputDir) { try { outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(outputDir); } catch (IOException e) { // Throw the IOException as a RuntimeException to be compatible with // MR1 throw new RuntimeException(e); } job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString()); }
From source file:com.btoddb.chronicle.apps.AvroTools.java
License:Open Source License
private void go(String srcDir) throws URISyntaxException, IOException { hdfsFs = FileSystem.get(new URI(srcDir), hdfsConfig); System.out.println();/*from ww w. j a v a2s. co m*/ System.out.println("Processing files from " + srcDir); System.out.println(); logger.debug("Searching for files in {}", srcDir); Path path = new Path(srcDir); if (!hdfsFs.exists(path)) { System.out.println("The path does not exist - cannot continue : " + path.toString()); return; } FileStatus[] statuses = hdfsFs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return !name.startsWith("_") && name.endsWith(".avro"); } }); for (FileStatus fs : statuses) { try { Path inPath = fs.getPath(); long fileSize = hdfsFs.getFileStatus(inPath).getLen(); System.out.println(String.format("Processing file, %s (%d)", inPath.toString(), fileSize)); testFileAndFix(inPath); } catch (Exception e) { // don't care about the cause, the test should be able to read all files it cares about e.printStackTrace(); } } }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
/** * @param permission Currently ignored./*from w w w. ja va2 s .c om*/ */ @Override public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { INode inode = store.retrieveINode(makeAbsolute(file)); if (inode != null) { if (overwrite) { delete(file, true); } else { throw new FileAlreadyExistsException("File already exists: " + file); } } else { Path parent = file.getParent(); if (parent != null) { if (!mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent.toString()); } } } return new FSDataOutputStream(new RadosHDFSOutputStream(store, makeAbsolute(file).toString())); }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
@Override public FSDataInputStream open(Path path, int bufferSize) throws IOException { return new FSDataInputStream(new RadosHDFSInputStream(store, path.toString())); }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
@Override public boolean rename(Path src, Path dst) throws IOException { Path absoluteSrc = makeAbsolute(src); final String debugPreamble = "Renaming '" + src + "' to '" + dst + "' - "; INode srcINode = store.retrieveINode(absoluteSrc); boolean debugEnabled = LOG.isDebugEnabled(); if (srcINode == null) { // src path doesn't exist if (debugEnabled) { LOG.debug(debugPreamble + "returning false as src does not exist"); }/*from w w w . jav a2 s .c o m*/ return false; } Path absoluteDst = makeAbsolute(dst); //validate the parent dir of the destination Path dstParent = absoluteDst.getParent(); if (dstParent != null) { //if the dst parent is not root, make sure it exists INode dstParentINode = store.retrieveINode(dstParent); if (dstParentINode == null) { // dst parent doesn't exist if (debugEnabled) { LOG.debug(debugPreamble + "returning false as dst parent does not exist"); } return false; } if (dstParentINode.isFile()) { // dst parent exists but is a file if (debugEnabled) { LOG.debug(debugPreamble + "returning false as dst parent exists and is a file"); } return false; } } //get status of source boolean srcIsFile = srcINode.isFile(); INode dstINode = store.retrieveINode(absoluteDst); boolean destExists = dstINode != null; boolean destIsDir = destExists && !dstINode.isFile(); if (srcIsFile) { //source is a simple file if (destExists) { if (destIsDir) { //outcome #1 dest exists and is dir -filename to subdir of dest if (debugEnabled) { LOG.debug(debugPreamble + "copying src file under dest dir to " + absoluteDst); } absoluteDst = new Path(absoluteDst, absoluteSrc.getName()); } else { //outcome #2 dest it's a file: fail iff different from src boolean renamingOnToSelf = absoluteSrc.equals(absoluteDst); if (debugEnabled) { LOG.debug(debugPreamble + "copying file onto file, outcome is " + renamingOnToSelf); } return renamingOnToSelf; } } else { // #3 dest does not exist: use dest as path for rename if (debugEnabled) { LOG.debug(debugPreamble + "copying file onto file"); } } } else { //here the source exists and is a directory // outcomes (given we know the parent dir exists if we get this far) // #1 destination is a file: fail // #2 destination is a directory: create a new dir under that one // #3 destination doesn't exist: create a new dir with that name // #3 and #4 are only allowed if the dest path is not == or under src if (destExists) { if (!destIsDir) { // #1 destination is a file: fail if (debugEnabled) { LOG.debug(debugPreamble + "returning false as src is a directory, but not dest"); } return false; } else { // the destination dir exists // destination for rename becomes a subdir of the target name absoluteDst = new Path(absoluteDst, absoluteSrc.getName()); if (debugEnabled) { LOG.debug(debugPreamble + "copying src dir under dest dir to " + absoluteDst); } } } //the final destination directory is now know, so validate it for //illegal moves if (absoluteSrc.equals(absoluteDst)) { //you can't rename a directory onto itself if (debugEnabled) { LOG.debug(debugPreamble + "Dest==source && isDir -failing"); } return false; } if (absoluteDst.toString().startsWith(absoluteSrc.toString() + "/")) { //you can't move a directory under itself if (debugEnabled) { LOG.debug(debugPreamble + "dst is equal to or under src dir -failing"); } return false; } } //here the dest path is set up -so rename return renameRecursive(absoluteSrc, absoluteDst); }
From source file:com.ceph.rados.fs.hdfs.RadosFileSystem.java
License:Apache License
@Override public boolean delete(Path path, boolean recursive) throws IOException { Path absolutePath = makeAbsolute(path); INode inode = store.retrieveINode(absolutePath); if (inode == null) { return false; }/*from w ww . j a v a2 s. c o m*/ if (inode.isFile()) { store.deleteINode(absolutePath); for (Block block : inode.getBlocks()) { store.deleteBlock(block); } } else { FileStatus[] contents = null; try { contents = listStatus(absolutePath); } catch (FileNotFoundException fnfe) { return false; } if ((contents.length != 0) && (!recursive)) { throw new IOException("Directory " + path.toString() + " is not empty."); } for (FileStatus p : contents) { if (!delete(p.getPath(), recursive)) { return false; } } store.deleteINode(absolutePath); } return true; }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java
License:Apache License
/** * Run TopK FPGrowth given the input file, */// w w w . j a v a 2s . com @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("minSupport", "s", "(Optional) The minimum number of times a co-occurrence must be present." + " Default Value: 3", "3"); addOption("maxHeapSize", "k", "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items." + " Default value: 50", "50"); addOption("numGroups", "g", "(Optional) Number of groups the features should be divided in the map-reduce version." + " Doesn't work in sequential version Default Value:" + PFPGrowth.NUM_GROUPS_DEFAULT, Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT)); addOption("splitterPattern", "regex", "Regular Expression pattern used to split given string transaction into" + " itemsets. Default value splits comma separated itemsets. Default Value:" + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*"); addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate" + " tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, " + "so keep this value small, but big enough to prevent duplicate tree building. " + "Default Value:5 Recommended Values: [5-10]", "5"); addOption("method", "method", "Method of processing: sequential|mapreduce", "sequential"); addOption("encoding", "e", "(Optional) The file encoding. Default value: UTF-8", "UTF-8"); addFlag("useFPG2", "2", "Use an alternate FPG implementation"); if (parseArguments(args) == null) { return -1; } Parameters params = new Parameters(); if (hasOption("minSupport")) { String minSupportString = getOption("minSupport"); params.set("minSupport", minSupportString); } if (hasOption("maxHeapSize")) { String maxHeapSizeString = getOption("maxHeapSize"); params.set("maxHeapSize", maxHeapSizeString); } if (hasOption("numGroups")) { String numGroupsString = getOption("numGroups"); params.set("numGroups", numGroupsString); } if (hasOption("numTreeCacheEntries")) { String numTreeCacheString = getOption("numTreeCacheEntries"); params.set("treeCacheSize", numTreeCacheString); } if (hasOption("splitterPattern")) { String patternString = getOption("splitterPattern"); params.set("splitPattern", patternString); } String encoding = "UTF-8"; if (hasOption("encoding")) { encoding = getOption("encoding"); } params.set("encoding", encoding); if (hasOption("useFPG2")) { params.set(PFPGrowth.USE_FPG2, "true"); } Path inputDir = getInputPath(); Path outputDir = getOutputPath(); params.set("input", inputDir.toString()); params.set("output", outputDir.toString()); String classificationMethod = getOption("method"); if ("sequential".equalsIgnoreCase(classificationMethod)) { runFPGrowth(params); } else if ("mapreduce".equalsIgnoreCase(classificationMethod)) { Configuration conf = new Configuration(); HadoopUtil.delete(conf, outputDir); PFPGrowth.runPFPGrowth(params); } return 0; }
From source file:com.cg.mapreduce.myfpgrowth.FPGrowthDriver.java
License:Apache License
/** * Run TopK FPGrowth given the input file, */// w w w . j a v a2 s. com @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("minSupport", "s", "(Optional) The minimum number of times a co-occurrence must be present." + " Default Value: 3", "3"); addOption("maxHeapSize", "k", "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items." + " Default value: 50", "50"); addOption("numGroups", "g", "(Optional) Number of groups the features should be divided in the map-reduce version." + " Doesn't work in sequential version Default Value:" + PFPGrowth.NUM_GROUPS_DEFAULT, Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT)); addOption("splitterPattern", "regex", "Regular Expression pattern used to split given string transaction into" + " itemsets. Default value splits comma separated itemsets. Default Value:" + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*"); addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate" + " tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, " + "so keep this value small, but big enough to prevent duplicate tree building. " + "Default Value:5 Recommended Values: [5-10]", "5"); addOption("method", "method", "Method of processing: sequential|mapreduce", "sequential"); addOption("encoding", "e", "(Optional) The file encoding. Default value: UTF-8", "UTF-8"); addFlag("useFPG2", "2", "Use an alternate FPG implementation"); if (parseArguments(args) == null) { return -1; } Parameters params = new Parameters(); if (hasOption("minSupport")) { String minSupportString = getOption("minSupport"); params.set("minSupport", minSupportString); } if (hasOption("maxHeapSize")) { String maxHeapSizeString = getOption("maxHeapSize"); params.set("maxHeapSize", maxHeapSizeString); } if (hasOption("numGroups")) { String numGroupsString = getOption("numGroups"); params.set("numGroups", numGroupsString); } if (hasOption("numTreeCacheEntries")) { String numTreeCacheString = getOption("numTreeCacheEntries"); params.set("treeCacheSize", numTreeCacheString); } if (hasOption("splitterPattern")) { String patternString = getOption("splitterPattern"); params.set("splitPattern", patternString); } String encoding = "UTF-8"; if (hasOption("encoding")) { encoding = getOption("encoding"); } params.set("encoding", encoding); if (hasOption("useFPG2")) { params.set(PFPGrowth.USE_FPG2, "true"); } Path inputDir = getInputPath(); Path outputDir = getOutputPath(); params.set("input", inputDir.toString()); params.set("output", outputDir.toString()); Configuration conf = new Configuration(); HadoopUtil.delete(conf, outputDir); PFPGrowth.runPFPGrowth(params); return 0; }