Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.impala.common.FileSystemUtil.java

License:Apache License

/**
 * Returns true if Path 'p' is a descendant of Path 'parent', false otherwise.
 * This function relies on Path.equals() which requires paths to have the same
 * schema and authority to compare equal. So both 'p' and 'parent' should either
 * be qualified or unqualified paths for this function to behave as expected.
 *//*from w  ww.ja v a2 s.  com*/
public static boolean isDescendantPath(Path p, Path parent) {
    if (p == null || parent == null)
        return false;
    while (!p.isRoot() && p.depth() != parent.depth())
        p = p.getParent();
    if (p.isRoot())
        return false;
    boolean result = p.equals(parent);
    if (!result && LOG.isTraceEnabled()) {
        // Add a message to the log if 'p' and 'parent' have inconsistent qualification.
        URI pUri = p.toUri();
        URI parentUri = parent.toUri();
        boolean sameScheme = Objects.equal(pUri.getScheme(), parentUri.getScheme());
        boolean sameAuthority = Objects.equal(pUri.getAuthority(), parentUri.getAuthority());
        if (!sameScheme || !sameAuthority) {
            LOG.trace("Inconsistent schema or authority for paths: " + p.toString() + " " + parent.toString());
        }
    }
    return result;
}

From source file:org.apache.ivory.cleanup.AbstractCleanupHandler.java

License:Apache License

private void deleteParentIfEmpty(FileSystem fs, Path parent) throws IOException {
    FileStatus[] files = fs.listStatus(parent);
    if (files != null && files.length == 0) {
        LOG.info("Parent path: " + parent + " is empty, deleting path");
        fs.delete(parent, true);//from w w w . j  a v a2  s.  com
        deleteParentIfEmpty(fs, parent.getParent());
    }

}

From source file:org.apache.ivory.converter.OozieProcessMapper.java

License:Apache License

private String getLibDirectory(String wfpath, Cluster cluster) throws IvoryException {
    Path path = new Path(wfpath.replace("${nameNode}", ""));
    String libDir;/*from  ww  w  . java2s. c  om*/
    try {
        FileSystem fs = FileSystem.get(ClusterHelper.getConfiguration(cluster));
        FileStatus status = fs.getFileStatus(path);
        if (status.isDir())
            libDir = path.toString() + "/lib";
        else
            libDir = path.getParent().toString() + "/lib";

        if (fs.exists(new Path(libDir)))
            return "${nameNode}" + libDir;
    } catch (IOException e) {
        throw new IvoryException(e);
    }
    return null;
}

From source file:org.apache.kylin.dict.AppendTrieDictionaryTest.java

License:Apache License

private void convertDirToOldFormat(String baseDir) throws IOException {
    Path basePath = new Path(baseDir);
    FileSystem fs = HadoopUtil.getFileSystem(basePath);

    // move version dir to base dir, to simulate the older format
    GlobalDictHDFSStore store = new GlobalDictHDFSStore(baseDir);
    Long[] versions = store.listAllVersions();
    Path versionPath = store.getVersionDir(versions[versions.length - 1]);
    Path tmpVersionPath = new Path(versionPath.getParent().getParent(), versionPath.getName());
    fs.rename(versionPath, tmpVersionPath);
    fs.delete(new Path(baseDir), true);
    fs.rename(tmpVersionPath, new Path(baseDir));
}

From source file:org.apache.kylin.dict.DictionaryManager.java

License:Apache License

private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException {

    InputStream in = this.getClass().getResourceAsStream("/org/apache/kylin/dict/" + dataSetName + ".txt");
    if (in == null) // data set resource not found
        return null;

    ByteArrayOutputStream buf = new ByteArrayOutputStream();
    IOUtils.copy(in, buf);//from ww  w.  j  av  a2 s. c o m
    in.close();
    byte[] bytes = buf.toByteArray();

    Path tmpDataSetPath = new Path(
            tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt");

    FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir);
    boolean writtenNewFile = false;
    if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) {
        fs.mkdirs(tmpDataSetPath.getParent());
        FSDataOutputStream out = fs.create(tmpDataSetPath);
        IOUtils.copy(new ByteArrayInputStream(bytes), out);
        out.close();
        writtenNewFile = true;
    }

    String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString();
    if (writtenNewFile)
        logger.info("Dictionary temp data set file written to " + qualifiedPath);
    return qualifiedPath;
}

From source file:org.apache.kylin.storage.hbase.steps.HDFSPathGarbageCollectionStep.java

License:Apache License

private void dropHdfsPathOnCluster(List<String> oldHdfsPaths, FileSystem fileSystem) throws IOException {
    if (oldHdfsPaths != null && oldHdfsPaths.size() > 0) {
        logger.debug("Drop HDFS path on FileSystem: " + fileSystem.getUri());
        output.append("Drop HDFS path on FileSystem: \"" + fileSystem.getUri() + "\" \n");
        for (String path : oldHdfsPaths) {
            if (path.endsWith("*"))
                path = path.substring(0, path.length() - 1);

            Path oldPath = new Path(path);
            if (fileSystem.exists(oldPath)) {
                fileSystem.delete(oldPath, true);
                logger.debug("HDFS path " + path + " is dropped.");
                output.append("HDFS path " + path + " is dropped.\n");
            } else {
                logger.debug("HDFS path " + path + " not exists.");
                output.append("HDFS path " + path + " not exists.\n");
            }/*from  w w w .  j  a v a  2s .  c  om*/
            // If hbase was deployed on another cluster, the job dir is empty and should be dropped,
            // because of rowkey_stats and hfile dirs are both dropped.
            if (fileSystem.listStatus(oldPath.getParent()).length == 0) {
                Path emptyJobPath = new Path(JobBuilderSupport.getJobWorkingDir(config, getJobId()));
                if (fileSystem.exists(emptyJobPath)) {
                    fileSystem.delete(emptyJobPath, true);
                    logger.debug("HDFS path " + emptyJobPath + " is empty and dropped.");
                    output.append("HDFS path " + emptyJobPath + " is empty and dropped.\n");
                }
            }
        }
    }
}

From source file:org.apache.lens.ml.ExampleUtils.java

License:Apache License

/**
 * Creates the example table./* ww w  . jav  a 2  s .  c o m*/
 *
 * @param conf           the conf
 * @param database       the database
 * @param tableName      the table name
 * @param sampleDataFile the sample data file
 * @param labelColumn    the label column
 * @param features       the features
 * @throws HiveException the hive exception
 */
public static void createTable(HiveConf conf, String database, String tableName, String sampleDataFile,
        String labelColumn, Map<String, String> tableParams, String... features) throws HiveException {

    Path dataFilePath = new Path(sampleDataFile);
    Path partDir = dataFilePath.getParent();

    // Create table
    List<FieldSchema> columns = new ArrayList<FieldSchema>();

    // Label is optional. Not used for unsupervised models.
    // If present, label will be the first column, followed by features
    if (labelColumn != null) {
        columns.add(new FieldSchema(labelColumn, "double", "Labelled Column"));
    }

    for (String feature : features) {
        columns.add(new FieldSchema(feature, "double", "Feature " + feature));
    }

    Table tbl = Hive.get(conf).newTable(database + "." + tableName);
    tbl.setTableType(TableType.MANAGED_TABLE);
    tbl.getTTable().getSd().setCols(columns);
    tbl.getTTable().getParameters().putAll(tableParams);
    tbl.setInputFormatClass(TextInputFormat.class);
    tbl.setSerdeParam(serdeConstants.LINE_DELIM, "\n");
    tbl.setSerdeParam(serdeConstants.FIELD_DELIM, " ");

    List<FieldSchema> partCols = new ArrayList<FieldSchema>(1);
    partCols.add(new FieldSchema("dummy_partition_col", "string", ""));
    tbl.setPartCols(partCols);

    Hive.get(conf).createTable(tbl, false);
    log.info("Created table {}", tableName);

    // Add partition for the data file
    AddPartitionDesc partitionDesc = new AddPartitionDesc(database, tableName, false);
    Map<String, String> partSpec = new HashMap<String, String>();
    partSpec.put("dummy_partition_col", "dummy_val");
    partitionDesc.addPartition(partSpec, partDir.toUri().toString());
    Hive.get(conf).createPartitions(partitionDesc);
    log.info("{}: Added partition {}", tableName, partDir.toUri().toString());
}

From source file:org.apache.lens.ml.impl.MLRunner.java

License:Apache License

public void createTable(String tableName, String dataFile) throws HiveException {

    File filedataFile = new File(dataFile);
    Path dataFilePath = new Path(filedataFile.toURI());
    Path partDir = dataFilePath.getParent();

    // Create table
    List<FieldSchema> columns = new ArrayList<FieldSchema>();

    // Label is optional. Not used for unsupervised models.
    // If present, label will be the first column, followed by features
    if (labelColumn != null) {
        columns.add(new FieldSchema(labelColumn, "double", "Labelled Column"));
    }//from   w  w  w. j  a  va2 s . co m

    for (String feature : features) {
        columns.add(new FieldSchema(feature, "double", "Feature " + feature));
    }

    Table tbl = Hive.get(conf).newTable(database + "." + tableName);
    tbl.setTableType(TableType.MANAGED_TABLE);
    tbl.getTTable().getSd().setCols(columns);
    // tbl.getTTable().getParameters().putAll(new HashMap<String, String>());
    tbl.setInputFormatClass(TextInputFormat.class);
    tbl.setSerdeParam(serdeConstants.LINE_DELIM, "\n");
    tbl.setSerdeParam(serdeConstants.FIELD_DELIM, " ");

    List<FieldSchema> partCols = new ArrayList<FieldSchema>(1);
    partCols.add(new FieldSchema("dummy_partition_col", "string", ""));
    tbl.setPartCols(partCols);

    Hive.get(conf).dropTable(database, tableName, false, true);
    Hive.get(conf).createTable(tbl, true);
    log.info("Created table {}", tableName);

    // Add partition for the data file
    AddPartitionDesc partitionDesc = new AddPartitionDesc(database, tableName, false);
    Map<String, String> partSpec = new HashMap<String, String>();
    partSpec.put("dummy_partition_col", "dummy_val");
    partitionDesc.addPartition(partSpec, partDir.toUri().toString());
    Hive.get(conf).createPartitions(partitionDesc);
    log.info("{}: Added partition {}", tableName, partDir.toUri().toString());
}

From source file:org.apache.mahout.classifier.bayes.MultipleOutputFormat.java

License:Apache License

/**
 * Generate the outfile name based on a given name and the input file name. If
 * the map input file does not exists (i.e. this is not for a map only job),
 * the given name is returned unchanged. If the config value for
 * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
 * name is returned unchanged. Otherwise, return a file name consisting of the
 * N trailing legs of the input file name where N is the config value for
 * "num.of.trailing.legs.to.use".//from   w  ww  .  j a va 2  s .  c  o  m
 * 
 * @param job
 *          the job config
 * @param name
 *          the output file name
 * @return the outfile name based on a given anme and the input file name.
 */
protected String getInputFileBasedOutputFileName(Configuration job, String name) {
    String infilepath = job.get("map.input.file");
    if (infilepath == null) {
        // if the map input file does not exists, then return the given name
        return name;
    }
    int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
    if (numOfTrailingLegsToUse <= 0) {
        return name;
    }
    Path infile = new Path(infilepath);
    Path parent = infile.getParent();
    String midName = infile.getName();
    Path outPath = new Path(midName);
    for (int i = 1; i < numOfTrailingLegsToUse; i++) {
        if (parent == null) {
            break;
        }
        midName = parent.getName();
        if (midName.length() == 0) {
            break;
        }
        parent = parent.getParent();
        outPath = new Path(midName, outPath);
    }
    return outPath.toString();
}

From source file:org.apache.mahout.clustering.lda.cvb.CVB0Driver.java

License:Apache License

private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration)
        throws IOException, ClassNotFoundException, InterruptedException {
    String jobName = "Calculating perplexity for " + modelPath;
    log.info("About to run: {}", jobName);

    Path outputPath = perplexityPath(modelPath.getParent(), iteration);
    Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class,
            DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class);

    job.setJobName(jobName);// w w  w.j ava2  s  . c  o  m
    job.setCombinerClass(DualDoubleSumReducer.class);
    job.setNumReduceTasks(1);
    setModelPaths(job, modelPath);
    HadoopUtil.delete(conf, outputPath);
    if (!job.waitForCompletion(true)) {
        throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
    }
    return readPerplexity(conf, modelPath.getParent(), iteration);
}