Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
        throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "Calculating perplexity for " + modelPath;
    log.info("About to run: " + jobName);
    Job job = new Job(conf, jobName);
    job.setJarByClass(CachingCVB0PerplexityMapper.class);
    job.setMapperClass(CachingCVB0PerplexityMapper.class);
    job.setCombinerClass(DualDoubleSumReducer.class);
    job.setReducerClass(DualDoubleSumReducer.class);
    job.setNumReduceTasks(1);/*from  www . j  a  v  a2s. c o m*/
    job.setOutputKeyClass(DoubleWritable.class);
    job.setOutputValueClass(DoubleWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.addInputPath(job, corpusPath);
    Path outputPath = perplexityPath(modelPath.getParent(), iteration);
    FileOutputFormat.setOutputPath(job, outputPath);
    setModelPaths(job, modelPath);
    HadoopUtil.delete(conf, outputPath);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
    }
    return readPerplexity(conf, modelPath.getParent(), iteration);
}

From source file:com.ery.dimport.daemon.TaskManager.java

License:Apache License

public void runTask(final TaskInfo task) {
    List<LogHostRunInfoPO> allFiles = new ArrayList<LogHostRunInfoPO>();
    try {//from  www .ja  va 2s.  c  o m
        task.START_TIME = new Date(System.currentTimeMillis());
        boolean needUpdate = false;
        TaskInfo exists = allTask.get(task.TASK_ID);
        if (exists == null) {
            needUpdate = true;
        } else {
            task.hosts = exists.hosts;
        }
        if (task.hosts == null || task.hosts.size() == 0) {
            task.hosts = new ArrayList<String>(master.getServerManager().getOnlineServers().keySet());
            needUpdate = true;
        }
        if (ZKUtil.checkExists(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID) == -1) {
            needUpdate = true;
        }
        if (needUpdate) {
            try {
                task.HOST_SIZE = task.hosts.size();
                master.logWriter.writeLog(task);
                ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID,
                        DImportConstant.Serialize(task));
            } catch (Throwable e) {
            }
        }
        Thread thread = Thread.currentThread();
        ProcessInfo procInfo = null;
        synchronized (taskInProgress) {
            procInfo = taskInProgress.get(task.getRunTaskId());
        }
        procInfo.thread = thread;
        procInfo.startTime = System.currentTimeMillis();
        procInfo.startTime = System.currentTimeMillis();
        String filePath = task.FILE_PATH;
        boolean isInHdfs = false;
        final Map<String, Long> files = new HashMap<String, Long>();
        String tmpPath = conf.get(DImportConstant.DIMPORT_PROCESS_TMPDATA_DIR, System.getProperty("user.home"));
        if (tmpPath.endsWith("/")) {
            tmpPath = tmpPath.substring(0, tmpPath.length() - 1);
        }
        if (filePath == null || filePath.equals("")) {
            files.put("", 0l);
        } else {
            if (task.fileNamePattern != null || (task.FILE_FILTER != null && !task.FILE_FILTER.equals(""))) {
                task.FILE_FILTER = DImportConstant.macroProcess(task.FILE_FILTER);
                task.FILE_FILTER = task.FILE_FILTER.replaceAll("\\{host\\}", this.master.hostName);
                task.fileNamePattern = Pattern.compile(task.FILE_FILTER);
            }
            Matcher m = hdfsUrlPattern.matcher(filePath);
            if (m.matches()) {
                isInHdfs = true;
                filePath = m.group(2);
                // for (String string : conf.getValByRegex(".*").keySet()) {
                // System.out.println(string + "=" + conf.get(string));
                // }
                Path dirPath = new Path(filePath);
                FileSystem fs = FileSystem.get(HadoopConf.getConf(conf));
                if (!fs.exists(dirPath) || !fs.isDirectory(dirPath)) {
                    throw new IOException("HDFS? " + filePath + "?,?");
                }
                FileStatus[] hFiles = fs.listStatus(dirPath, new PathFilter() {
                    @Override
                    public boolean accept(Path name) {
                        if (task.fileNamePattern != null) {
                            System.out.println("hdfs listStatus:" + name.getParent() + "/" + name.getName());
                            return task.fileNamePattern.matcher(name.getName()).matches();
                        } else {
                            return true;
                        }
                    }
                });
                for (int i = 0; i < hFiles.length; i++) {
                    files.put(hFiles[i].getPath().toString(), hFiles[i].getLen());
                }
            } else {
                java.io.File f = new File(filePath);
                if (!f.exists() || !f.isDirectory()) {
                    throw new IOException(
                            "? " + filePath + "? ,?");
                }
                File[] lFiles = f.listFiles(new FilenameFilter() {
                    public boolean accept(File dir, String name) {
                        if (task.fileNamePattern != null) {
                            System.out.println("local fs listStatus:" + dir + "/" + name);
                            return task.fileNamePattern.matcher(name).matches();
                        } else {
                            return true;
                        }
                    }
                });
                for (int i = 0; i < lFiles.length; i++) {
                    files.put(lFiles[i].getAbsolutePath(), lFiles[i].length());
                }
            }
        }
        for (String fileName : files.keySet()) {
            LogHostRunInfoPO runInfo = new LogHostRunInfoPO(task);
            runInfo.RUN_LOG_ID = DImportConstant.shdf.format(task.SUBMIT_TIME) + "_" + allFiles.size() + "_"
                    + fileName.hashCode();
            runInfo.FILE_NAME = fileName;
            runInfo.RETURN_CODE = 255;
            runInfo.IS_RUN_SUCCESS = -1;
            runInfo.FILE_SIZE = files.get(fileName);
            runInfo.HOST_NAME = master.hostName;
            String localFile = fileName;
            if (isInHdfs) {// 
                localFile = tmpPath + "/" + fileName.substring(fileName.lastIndexOf("/") + 1);
            }
            // 
            String[] cmds = procInfo.task.getCommand();
            for (int j = 0; j < cmds.length; j++) {
                cmds[j] = DImportConstant.macroProcess(cmds[j]);
                cmds[j] = cmds[j].replaceAll("\\{file\\}", localFile);
                cmds[j] = cmds[j].replaceAll("\\{host\\}", master.hostName);
            }
            runInfo.RUN_COMMAND = StringUtils.join(" ", cmds);
            master.logWriter.writeLog(runInfo);
            LOG.info("??" + runInfo);
            allFiles.add(runInfo);
        }
        ZKUtil.createSetData(watcher, watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName,
                DImportConstant.Serialize(allFiles));
        for (LogHostRunInfoPO runInfo : allFiles) {
            if (procInfo.stoped)
                break;
            String fileName = runInfo.FILE_NAME;
            LOG.info("?:" + fileName);
            procInfo.RUN_LOG_ID = runInfo.RUN_LOG_ID;
            runInfo.START_TIME = new Date(System.currentTimeMillis());
            procInfo.processFile = fileName;
            String localFile = fileName;
            try {
                if (isInHdfs) {// 
                    localFile = tmpPath + "/" + fileName.substring(fileName.lastIndexOf("/") + 1);
                }
                procInfo.task.TASK_COMMAND = runInfo.RUN_COMMAND;
                if (isInHdfs) {// 
                    File lf = new File(localFile);
                    if (lf.exists())
                        lf.delete();
                    FileSystem fs = FileSystem.get(HadoopConf.getConf(conf));
                    LOG.info("HDFS:" + fileName + "===>" + localFile);
                    long btime = System.currentTimeMillis();
                    fs.copyToLocalFile(new Path(fileName), new Path(localFile));
                    LOG.info("HDFS?:" + fileName + "===>" + localFile);
                    runInfo.downTime = System.currentTimeMillis() - btime;
                    fileName = localFile;
                }
                updateHostInfoLog(runInfo, allFiles);
                LOG.info(procInfo.task.TASK_NAME + " commandline: " + procInfo.task.TASK_COMMAND);
                procInfo.proc = execResult(runInfo.RUN_COMMAND);
                runInfo.IS_RUN_SUCCESS = 1;
                runInfo.RETURN_CODE = writeProcessLog(procInfo);
                LOG.info(procInfo.task.TASK_NAME + " return value: " + runInfo.RETURN_CODE);
                // runInfo.RETURN_CODE = procInfo.proc.exitValue();
            } catch (Throwable e) {
                runInfo.ERROR_MSG = e.getMessage();
                if (procInfo.proc != null) {
                    try {
                        procInfo.proc.destroy();
                    } catch (Exception ex) {
                    }
                }
                procInfo.proc = null;
                LOG.error("", e);
            } finally { // 
                runInfo.END_TIME = new Date(System.currentTimeMillis());
                master.logWriter.updateLog(runInfo);
                updateHostInfoLog(runInfo, allFiles);
                ZKUtil.createSetData(watcher,
                        watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName,
                        DImportConstant.Serialize(allFiles));
                if (isInHdfs) {
                    File lf = new File(localFile);
                    if (lf.exists())
                        lf.delete();
                }
            }
        }
    } catch (Throwable e) {
        LOG.error("" + task, e);
        try {
            if (allFiles.size() > 0) {
                for (LogHostRunInfoPO logHostRunInfoPO : allFiles) {
                    if (logHostRunInfoPO.END_TIME.getTime() < 10000) {
                        logHostRunInfoPO.END_TIME = new Date(System.currentTimeMillis());
                        logHostRunInfoPO.IS_RUN_SUCCESS = 1;
                        logHostRunInfoPO.RETURN_CODE = 2;
                    }
                }
                ZKUtil.createSetData(watcher,
                        watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/" + master.hostName,
                        DImportConstant.Serialize(allFiles));
            }
        } catch (KeeperException e1) {
            LOG.error("update task run info on host :" + watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/"
                    + master.hostName, e);
        } catch (IOException e1) {
            LOG.error("update task run info on host " + watcher.dimportRunTaskNode + "/" + task.TASK_ID + "/"
                    + master.hostName, e);
        }
    } finally { // 
        synchronized (taskInProgress) {
            taskInProgress.remove(task.getRunTaskId());
        }
    }
}

From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java

License:Apache License

private <T> void writeFile(String type, Path path, JsonCodec<T> codec, T value, boolean overwrite) {
    try {//from w  ww. j av a  2  s .c o m
        byte[] json = codec.toJsonBytes(value);

        if (!overwrite) {
            if (metadataFileSystem.exists(path)) {
                throw new PrestoException(HIVE_METASTORE_ERROR, type + " file already exists");
            }
        }

        metadataFileSystem.mkdirs(path.getParent());

        // todo implement safer overwrite code
        try (OutputStream outputStream = metadataFileSystem.create(path, overwrite)) {
            outputStream.write(json);
        }
    } catch (Exception e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, "Could not write " + type, e);
    }
}

From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java

License:Apache License

private static boolean isChildDirectory(Path parentDirectory, Path childDirectory) {
    if (parentDirectory.equals(childDirectory)) {
        return true;
    }//w ww .  j  ava  2  s. co  m
    if (childDirectory.isRoot()) {
        return false;
    }
    return isChildDirectory(parentDirectory, childDirectory.getParent());
}

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static boolean isSameOrParent(Path parent, Path child) {
    int parentDepth = parent.depth();
    int childDepth = child.depth();
    if (parentDepth > childDepth) {
        return false;
    }/*from  w w w .  jav a2s  . c  o m*/
    for (int i = childDepth; i > parentDepth; i--) {
        child = child.getParent();
    }
    return parent.equals(child);
}

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static void renameDirectory(String user, HdfsEnvironment hdfsEnvironment, Path source, Path target,
        Runnable runWhenPathDoesntExist) {
    if (pathExists(user, hdfsEnvironment, target)) {
        throw new PrestoException(HIVE_PATH_ALREADY_EXISTS,
                format("Unable to rename from %s to %s: target directory already exists", source, target));
    }//from   w  w w . j a  v a  2s. c om

    if (!pathExists(user, hdfsEnvironment, target.getParent())) {
        createDirectory(user, hdfsEnvironment, target.getParent());
    }

    // The runnable will assume that if rename fails, it will be okay to delete the directory (if the directory is empty).
    // This is not technically true because a race condition still exists.
    runWhenPathDoesntExist.run();

    try {
        if (!hdfsEnvironment.getFileSystem(user, source).rename(source, target)) {
            throw new PrestoException(HIVE_FILESYSTEM_ERROR,
                    format("Failed to rename %s to %s: rename returned false", source, target));
        }
    } catch (IOException e) {
        throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Failed to rename %s to %s", source, target),
                e);
    }
}

From source file:com.facebook.presto.raptor.storage.SyncingFileSystem.java

License:Apache License

@Override
public FSDataOutputStream create(Path path, boolean overwrite, int bufferSize, short replication,
        long blockSize, Progressable progress) throws IOException {
    if (exists(path) && !overwrite) {
        throw new IOException("file already exists: " + path);
    }/*from   w ww . ja  va  2s  .c o m*/
    Path parent = path.getParent();
    if ((parent != null) && !mkdirs(parent)) {
        throw new IOException("mkdirs failed to create " + parent.toString());
    }
    return new FSDataOutputStream(
            new BufferedOutputStream(new LocalFileOutputStream(pathToFile(path)), bufferSize), statistics);
}

From source file:com.fullcontact.cassandra.io.sstable.Descriptor.java

License:Apache License

/**
 * @param filename The SSTable filename/* w w w.j a  va2  s  . c  o  m*/
 * @return Descriptor of the SSTable initialized from filename
 * @see #fromFilename(org.apache.hadoop.fs.Path, String) (File directory, String name)
 */
public static Descriptor fromFilename(String filename) {
    Path file = new Path(filename);
    return fromFilename(file.getParent(), file.getName()).left;
}

From source file:com.fullcontact.sstable.hadoop.SSTableFunctions.java

License:Apache License

/**
 * Return a function which determines the SSTable index file when supplied with the SSTable data file.
 * @return Function./*from   w w w.j a v  a 2s.c  o  m*/
 */
public static Function<Path, Path> indexFile() {
    return new Function<Path, Path>() {
        @Nullable
        @Override
        public Path apply(@Nullable Path dataFile) {
            final String dataFileName = dataFile.getName();
            return new Path(dataFile.getParent(), dataFileName.replace("-Data.db", "-Index.db"));
        }
    };
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type,
        long start, long end) throws IOException {
    Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>();

    // hoplog files names follow this pattern
    String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type;
    String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION;
    final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX);
    final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX);

    Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME);
    long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath);

    // a region directory contains directories for individual buckets. A bucket
    // has a integer name.
    FileStatus[] bucketDirs = fs.listStatus(regionPath);

    for (FileStatus bucket : bucketDirs) {
        if (!bucket.isDirectory()) {
            continue;
        }/* ww  w.jav a 2s . c  om*/
        try {
            Integer.valueOf(bucket.getPath().getName());
        } catch (NumberFormatException e) {
            continue;
        }

        ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>();

        // identify all the flush hoplogs and seq hoplogs by visiting all the
        // bucket directories
        FileStatus[] bucketFiles = fs.listStatus(bucket.getPath());

        Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern);

        FileStatus oldestHopAfterEndTS = null;
        long oldestHopTS = Long.MAX_VALUE;
        long currentTimeStamp = System.currentTimeMillis();
        for (FileStatus file : bucketFiles) {
            if (!file.isFile()) {
                continue;
            }

            Matcher match = pattern.matcher(file.getPath().getName());
            if (!match.matches()) {
                continue;
            }

            long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match);
            if (start > 0 && timeStamp < start) {
                // this hoplog contains records less than the start time stamp
                continue;
            }

            if (end > 0 && timeStamp > end) {
                // this hoplog contains records mutated after end time stamp. Ignore
                // this hoplog if it is not the oldest.
                if (oldestHopTS > timeStamp) {
                    oldestHopTS = timeStamp;
                    oldestHopAfterEndTS = file;
                }
                continue;
            }
            long expiredTimeStamp = expiredTime(file, expiredHoplogs);
            if (expiredTimeStamp > 0 && intervalDurationMillis > 0) {
                if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) {
                    continue;
                }
            }
            bucketHoplogs.add(file);
        }

        if (oldestHopAfterEndTS != null) {
            long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs);
            if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0
                    || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) {
                bucketHoplogs.add(oldestHopAfterEndTS);
            }
        }

        if (bucketHoplogs.size() > 0) {
            allBuckets.add(bucketHoplogs);
        }
    }

    return allBuckets;
}