List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.elex.dmp.lda.CVB0Driver.java
License:Apache License
private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "Calculating perplexity for " + modelPath; log.info("About to run: " + jobName); Job job = new Job(conf, jobName); job.setJarByClass(CachingCVB0PerplexityMapper.class); job.setMapperClass(CachingCVB0PerplexityMapper.class); job.setCombinerClass(DualDoubleSumReducer.class); job.setReducerClass(DualDoubleSumReducer.class); job.setNumReduceTasks(1);/*from www . j a v a2s. c o m*/ job.setOutputKeyClass(DoubleWritable.class); job.setOutputValueClass(DoubleWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, corpusPath); Path outputPath = perplexityPath(modelPath.getParent(), iteration); FileOutputFormat.setOutputPath(job, outputPath); setModelPaths(job, modelPath); HadoopUtil.delete(conf, outputPath); if (!job.waitForCompletion(true)) { throw new InterruptedException("Failed to calculate perplexity for: " + modelPath); } return readPerplexity(conf, modelPath.getParent(), iteration); }
From source file:com.ery.dimport.daemon.TaskManager.java
License:Apache License
public void runTask(final TaskInfo task) { List<LogHostRunInfoPO> allFiles = new ArrayList<LogHostRunInfoPO>(); try {//from www .ja va 2s. c o m task.START_TIME = new Date(System.currentTimeMillis()); boolean needUpdate = false; TaskInfo exists = allTask.get(task.TASK_ID); if (exists == null) { needUpdate = true; } else { task.hosts = exists.hosts; } if (task.hosts == null || task.hosts.size() == 0) { task.hosts = new ArrayList<String>(master.getServerManager().getOnlineServers().keySet()); needUpdate = true; } if (ZKUtil.checkExists(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID) == -1) { needUpdate = true; } if (needUpdate) { try { task.HOST_SIZE = task.hosts.size(); master.logWriter.writeLog(task); ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID, DImportConstant.Serialize(task)); } catch (Throwable e) { } } Thread thread = Thread.currentThread(); ProcessInfo procInfo = null; synchronized (taskInProgress) { procInfo = taskInProgress.get(task.getRunTaskId()); } procInfo.thread = thread; procInfo.startTime = System.currentTimeMillis(); procInfo.startTime = System.currentTimeMillis(); String filePath = task.FILE_PATH; boolean isInHdfs = false; final Map<String, Long> files = new HashMap<String, Long>(); String tmpPath = conf.get(DImportConstant.DIMPORT_PROCESS_TMPDATA_DIR, System.getProperty("user.home")); if (tmpPath.endsWith("/")) { tmpPath = tmpPath.substring(0, tmpPath.length() - 1); } if (filePath == null || filePath.equals("")) { files.put("", 0l); } else { if (task.fileNamePattern != null || (task.FILE_FILTER != null && !task.FILE_FILTER.equals(""))) { task.FILE_FILTER = DImportConstant.macroProcess(task.FILE_FILTER); task.FILE_FILTER = task.FILE_FILTER.replaceAll("\\{host\\}", this.master.hostName); task.fileNamePattern = Pattern.compile(task.FILE_FILTER); } Matcher m = hdfsUrlPattern.matcher(filePath); if (m.matches()) { isInHdfs = true; filePath = m.group(2); // for (String string : conf.getValByRegex(".*").keySet()) { // System.out.println(string + "=" + conf.get(string)); // } Path dirPath = new Path(filePath); FileSystem fs = FileSystem.get(HadoopConf.getConf(conf)); if (!fs.exists(dirPath) || !fs.isDirectory(dirPath)) { throw new IOException("HDFS? " + filePath + "?,?"); } FileStatus[] hFiles = fs.listStatus(dirPath, new PathFilter() { @Override public boolean accept(Path name) { if (task.fileNamePattern != null) { System.out.println("hdfs listStatus:" + name.getParent() + "/" + name.getName()); return task.fileNamePattern.matcher(name.getName()).matches(); } else { return true; } } }); for (int i = 0; i < hFiles.length; i++) { files.put(hFiles[i].getPath().toString(), hFiles[i].getLen()); } } else { java.io.File f = new File(filePath); if (!f.exists() || !f.isDirectory()) { throw new IOException( "? " + filePath + "? ,?"); } File[] lFiles = f.listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { if (task.fileNamePattern != null) { System.out.println("local fs listStatus:" + dir + "/" + name); return task.fileNamePattern.matcher(name).matches(); } else { return true; } } }); for (int i = 0; i < lFiles.length; i++) { files.put(lFiles[i].getAbsolutePath(), lFiles[i].length()); } } } for (String fileName : files.keySet()) { LogHostRunInfoPO runInfo = new LogHostRunInfoPO(task); runInfo.RUN_LOG_ID = DImportConstant.shdf.format(task.SUBMIT_TIME) + "_" + allFiles.size() + "_" + fileName.hashCode(); runInfo.FILE_NAME = fileName; runInfo.RETURN_CODE = 255; runInfo.IS_RUN_SUCCESS = -1; runInfo.FILE_SIZE = files.get(fileName); runInfo.HOST_NAME = master.hostName; String localFile = fileName; if (isInHdfs) {// localFile = tmpPath + "/" + fileName.substring(fileName.lastIndexOf("/") + 1); } // String[] cmds = procInfo.task.getCommand(); for (int j = 0; j < cmds.length; j++) { cmds[j] = DImportConstant.macroProcess(cmds[j]); cmds[j] = cmds[j].replaceAll("\\{file\\}", localFile); cmds[j] = cmds[j].replaceAll("\\{host\\}", master.hostName); } runInfo.RUN_COMMAND = StringUtils.join(" ", cmds); master.logWriter.writeLog(runInfo); LOG.info("??" + runInfo); allFiles.add(runInfo); } ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, DImportConstant.Serialize(allFiles)); for (LogHostRunInfoPO runInfo : allFiles) { if (procInfo.stoped) break; String fileName = runInfo.FILE_NAME; LOG.info("?:" + fileName); procInfo.RUN_LOG_ID = runInfo.RUN_LOG_ID; runInfo.START_TIME = new Date(System.currentTimeMillis()); procInfo.processFile = fileName; String localFile = fileName; try { if (isInHdfs) {// localFile = tmpPath + "/" + fileName.substring(fileName.lastIndexOf("/") + 1); } procInfo.task.TASK_COMMAND = runInfo.RUN_COMMAND; if (isInHdfs) {// File lf = new File(localFile); if (lf.exists()) lf.delete(); FileSystem fs = FileSystem.get(HadoopConf.getConf(conf)); LOG.info("HDFS:" + fileName + "===>" + localFile); long btime = System.currentTimeMillis(); fs.copyToLocalFile(new Path(fileName), new Path(localFile)); LOG.info("HDFS?:" + fileName + "===>" + localFile); runInfo.downTime = System.currentTimeMillis() - btime; fileName = localFile; } updateHostInfoLog(runInfo, allFiles); LOG.info(procInfo.task.TASK_NAME + " commandline: " + procInfo.task.TASK_COMMAND); procInfo.proc = execResult(runInfo.RUN_COMMAND); runInfo.IS_RUN_SUCCESS = 1; runInfo.RETURN_CODE = writeProcessLog(procInfo); LOG.info(procInfo.task.TASK_NAME + " return value: " + runInfo.RETURN_CODE); // runInfo.RETURN_CODE = procInfo.proc.exitValue(); } catch (Throwable e) { runInfo.ERROR_MSG = e.getMessage(); if (procInfo.proc != null) { try { procInfo.proc.destroy(); } catch (Exception ex) { } } procInfo.proc = null; LOG.error("", e); } finally { // runInfo.END_TIME = new Date(System.currentTimeMillis()); master.logWriter.updateLog(runInfo); updateHostInfoLog(runInfo, allFiles); ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, DImportConstant.Serialize(allFiles)); if (isInHdfs) { File lf = new File(localFile); if (lf.exists()) lf.delete(); } } } } catch (Throwable e) { LOG.error("" + task, e); try { if (allFiles.size() > 0) { for (LogHostRunInfoPO logHostRunInfoPO : allFiles) { if (logHostRunInfoPO.END_TIME.getTime() < 10000) { logHostRunInfoPO.END_TIME = new Date(System.currentTimeMillis()); logHostRunInfoPO.IS_RUN_SUCCESS = 1; logHostRunInfoPO.RETURN_CODE = 2; } } ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, DImportConstant.Serialize(allFiles)); } } catch (KeeperException e1) { LOG.error("update task run info on host :" + watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, e); } catch (IOException e1) { LOG.error("update task run info on host " + watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName, e); } } finally { // synchronized (taskInProgress) { taskInProgress.remove(task.getRunTaskId()); } } }
From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java
License:Apache License
private <T> void writeFile(String type, Path path, JsonCodec<T> codec, T value, boolean overwrite) { try {//from w ww. j av a 2 s .c o m byte[] json = codec.toJsonBytes(value); if (!overwrite) { if (metadataFileSystem.exists(path)) { throw new PrestoException(HIVE_METASTORE_ERROR, type + " file already exists"); } } metadataFileSystem.mkdirs(path.getParent()); // todo implement safer overwrite code try (OutputStream outputStream = metadataFileSystem.create(path, overwrite)) { outputStream.write(json); } } catch (Exception e) { throw new PrestoException(HIVE_METASTORE_ERROR, "Could not write " + type, e); } }
From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java
License:Apache License
private static boolean isChildDirectory(Path parentDirectory, Path childDirectory) { if (parentDirectory.equals(childDirectory)) { return true; }//w ww . j ava 2 s. co m if (childDirectory.isRoot()) { return false; } return isChildDirectory(parentDirectory, childDirectory.getParent()); }
From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java
License:Apache License
private static boolean isSameOrParent(Path parent, Path child) { int parentDepth = parent.depth(); int childDepth = child.depth(); if (parentDepth > childDepth) { return false; }/*from w w w . jav a2s . c o m*/ for (int i = childDepth; i > parentDepth; i--) { child = child.getParent(); } return parent.equals(child); }
From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java
License:Apache License
private static void renameDirectory(String user, HdfsEnvironment hdfsEnvironment, Path source, Path target, Runnable runWhenPathDoesntExist) { if (pathExists(user, hdfsEnvironment, target)) { throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format("Unable to rename from %s to %s: target directory already exists", source, target)); }//from w w w . j a v a 2s. c om if (!pathExists(user, hdfsEnvironment, target.getParent())) { createDirectory(user, hdfsEnvironment, target.getParent()); } // The runnable will assume that if rename fails, it will be okay to delete the directory (if the directory is empty). // This is not technically true because a race condition still exists. runWhenPathDoesntExist.run(); try { if (!hdfsEnvironment.getFileSystem(user, source).rename(source, target)) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Failed to rename %s to %s: rename returned false", source, target)); } } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Failed to rename %s to %s", source, target), e); } }
From source file:com.facebook.presto.raptor.storage.SyncingFileSystem.java
License:Apache License
@Override public FSDataOutputStream create(Path path, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (exists(path) && !overwrite) { throw new IOException("file already exists: " + path); }/*from w ww . ja va 2s .c o m*/ Path parent = path.getParent(); if ((parent != null) && !mkdirs(parent)) { throw new IOException("mkdirs failed to create " + parent.toString()); } return new FSDataOutputStream( new BufferedOutputStream(new LocalFileOutputStream(pathToFile(path)), bufferSize), statistics); }
From source file:com.fullcontact.cassandra.io.sstable.Descriptor.java
License:Apache License
/** * @param filename The SSTable filename/* w w w.j a va2 s . c o m*/ * @return Descriptor of the SSTable initialized from filename * @see #fromFilename(org.apache.hadoop.fs.Path, String) (File directory, String name) */ public static Descriptor fromFilename(String filename) { Path file = new Path(filename); return fromFilename(file.getParent(), file.getName()).left; }
From source file:com.fullcontact.sstable.hadoop.SSTableFunctions.java
License:Apache License
/** * Return a function which determines the SSTable index file when supplied with the SSTable data file. * @return Function./*from w w w.j a v a 2s.c o m*/ */ public static Function<Path, Path> indexFile() { return new Function<Path, Path>() { @Nullable @Override public Path apply(@Nullable Path dataFile) { final String dataFileName = dataFile.getName(); return new Path(dataFile.getParent(), dataFileName.replace("-Data.db", "-Index.db")); } }; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type, long start, long end) throws IOException { Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>(); // hoplog files names follow this pattern String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type; String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION; final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX); final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX); Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME); long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath); // a region directory contains directories for individual buckets. A bucket // has a integer name. FileStatus[] bucketDirs = fs.listStatus(regionPath); for (FileStatus bucket : bucketDirs) { if (!bucket.isDirectory()) { continue; }/* ww w.jav a 2s . c om*/ try { Integer.valueOf(bucket.getPath().getName()); } catch (NumberFormatException e) { continue; } ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>(); // identify all the flush hoplogs and seq hoplogs by visiting all the // bucket directories FileStatus[] bucketFiles = fs.listStatus(bucket.getPath()); Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern); FileStatus oldestHopAfterEndTS = null; long oldestHopTS = Long.MAX_VALUE; long currentTimeStamp = System.currentTimeMillis(); for (FileStatus file : bucketFiles) { if (!file.isFile()) { continue; } Matcher match = pattern.matcher(file.getPath().getName()); if (!match.matches()) { continue; } long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match); if (start > 0 && timeStamp < start) { // this hoplog contains records less than the start time stamp continue; } if (end > 0 && timeStamp > end) { // this hoplog contains records mutated after end time stamp. Ignore // this hoplog if it is not the oldest. if (oldestHopTS > timeStamp) { oldestHopTS = timeStamp; oldestHopAfterEndTS = file; } continue; } long expiredTimeStamp = expiredTime(file, expiredHoplogs); if (expiredTimeStamp > 0 && intervalDurationMillis > 0) { if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) { continue; } } bucketHoplogs.add(file); } if (oldestHopAfterEndTS != null) { long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs); if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0 || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) { bucketHoplogs.add(oldestHopAfterEndTS); } } if (bucketHoplogs.size() > 0) { allBuckets.add(bucketHoplogs); } } return allBuckets; }