List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:org.apache.impala.common.FileSystemUtil.java
License:Apache License
/** * Returns true if Path 'p' is a descendant of Path 'parent', false otherwise. * This function relies on Path.equals() which requires paths to have the same * schema and authority to compare equal. So both 'p' and 'parent' should either * be qualified or unqualified paths for this function to behave as expected. *//*from w ww.ja v a2 s. com*/ public static boolean isDescendantPath(Path p, Path parent) { if (p == null || parent == null) return false; while (!p.isRoot() && p.depth() != parent.depth()) p = p.getParent(); if (p.isRoot()) return false; boolean result = p.equals(parent); if (!result && LOG.isTraceEnabled()) { // Add a message to the log if 'p' and 'parent' have inconsistent qualification. URI pUri = p.toUri(); URI parentUri = parent.toUri(); boolean sameScheme = Objects.equal(pUri.getScheme(), parentUri.getScheme()); boolean sameAuthority = Objects.equal(pUri.getAuthority(), parentUri.getAuthority()); if (!sameScheme || !sameAuthority) { LOG.trace("Inconsistent schema or authority for paths: " + p.toString() + " " + parent.toString()); } } return result; }
From source file:org.apache.ivory.cleanup.AbstractCleanupHandler.java
License:Apache License
private void deleteParentIfEmpty(FileSystem fs, Path parent) throws IOException { FileStatus[] files = fs.listStatus(parent); if (files != null && files.length == 0) { LOG.info("Parent path: " + parent + " is empty, deleting path"); fs.delete(parent, true);//from w w w . j a v a2 s. com deleteParentIfEmpty(fs, parent.getParent()); } }
From source file:org.apache.ivory.converter.OozieProcessMapper.java
License:Apache License
private String getLibDirectory(String wfpath, Cluster cluster) throws IvoryException { Path path = new Path(wfpath.replace("${nameNode}", "")); String libDir;/*from ww w . java2s. c om*/ try { FileSystem fs = FileSystem.get(ClusterHelper.getConfiguration(cluster)); FileStatus status = fs.getFileStatus(path); if (status.isDir()) libDir = path.toString() + "/lib"; else libDir = path.getParent().toString() + "/lib"; if (fs.exists(new Path(libDir))) return "${nameNode}" + libDir; } catch (IOException e) { throw new IvoryException(e); } return null; }
From source file:org.apache.kylin.dict.AppendTrieDictionaryTest.java
License:Apache License
private void convertDirToOldFormat(String baseDir) throws IOException { Path basePath = new Path(baseDir); FileSystem fs = HadoopUtil.getFileSystem(basePath); // move version dir to base dir, to simulate the older format GlobalDictHDFSStore store = new GlobalDictHDFSStore(baseDir); Long[] versions = store.listAllVersions(); Path versionPath = store.getVersionDir(versions[versions.length - 1]); Path tmpVersionPath = new Path(versionPath.getParent().getParent(), versionPath.getName()); fs.rename(versionPath, tmpVersionPath); fs.delete(new Path(baseDir), true); fs.rename(tmpVersionPath, new Path(baseDir)); }
From source file:org.apache.kylin.dict.DictionaryManager.java
License:Apache License
private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException { InputStream in = this.getClass().getResourceAsStream("/org/apache/kylin/dict/" + dataSetName + ".txt"); if (in == null) // data set resource not found return null; ByteArrayOutputStream buf = new ByteArrayOutputStream(); IOUtils.copy(in, buf);//from ww w. j av a2 s. c o m in.close(); byte[] bytes = buf.toByteArray(); Path tmpDataSetPath = new Path( tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt"); FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir); boolean writtenNewFile = false; if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) { fs.mkdirs(tmpDataSetPath.getParent()); FSDataOutputStream out = fs.create(tmpDataSetPath); IOUtils.copy(new ByteArrayInputStream(bytes), out); out.close(); writtenNewFile = true; } String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString(); if (writtenNewFile) logger.info("Dictionary temp data set file written to " + qualifiedPath); return qualifiedPath; }
From source file:org.apache.kylin.storage.hbase.steps.HDFSPathGarbageCollectionStep.java
License:Apache License
private void dropHdfsPathOnCluster(List<String> oldHdfsPaths, FileSystem fileSystem) throws IOException { if (oldHdfsPaths != null && oldHdfsPaths.size() > 0) { logger.debug("Drop HDFS path on FileSystem: " + fileSystem.getUri()); output.append("Drop HDFS path on FileSystem: \"" + fileSystem.getUri() + "\" \n"); for (String path : oldHdfsPaths) { if (path.endsWith("*")) path = path.substring(0, path.length() - 1); Path oldPath = new Path(path); if (fileSystem.exists(oldPath)) { fileSystem.delete(oldPath, true); logger.debug("HDFS path " + path + " is dropped."); output.append("HDFS path " + path + " is dropped.\n"); } else { logger.debug("HDFS path " + path + " not exists."); output.append("HDFS path " + path + " not exists.\n"); }/*from w w w . j a v a 2s . c om*/ // If hbase was deployed on another cluster, the job dir is empty and should be dropped, // because of rowkey_stats and hfile dirs are both dropped. if (fileSystem.listStatus(oldPath.getParent()).length == 0) { Path emptyJobPath = new Path(JobBuilderSupport.getJobWorkingDir(config, getJobId())); if (fileSystem.exists(emptyJobPath)) { fileSystem.delete(emptyJobPath, true); logger.debug("HDFS path " + emptyJobPath + " is empty and dropped."); output.append("HDFS path " + emptyJobPath + " is empty and dropped.\n"); } } } } }
From source file:org.apache.lens.ml.ExampleUtils.java
License:Apache License
/** * Creates the example table./* ww w . jav a 2 s . c o m*/ * * @param conf the conf * @param database the database * @param tableName the table name * @param sampleDataFile the sample data file * @param labelColumn the label column * @param features the features * @throws HiveException the hive exception */ public static void createTable(HiveConf conf, String database, String tableName, String sampleDataFile, String labelColumn, Map<String, String> tableParams, String... features) throws HiveException { Path dataFilePath = new Path(sampleDataFile); Path partDir = dataFilePath.getParent(); // Create table List<FieldSchema> columns = new ArrayList<FieldSchema>(); // Label is optional. Not used for unsupervised models. // If present, label will be the first column, followed by features if (labelColumn != null) { columns.add(new FieldSchema(labelColumn, "double", "Labelled Column")); } for (String feature : features) { columns.add(new FieldSchema(feature, "double", "Feature " + feature)); } Table tbl = Hive.get(conf).newTable(database + "." + tableName); tbl.setTableType(TableType.MANAGED_TABLE); tbl.getTTable().getSd().setCols(columns); tbl.getTTable().getParameters().putAll(tableParams); tbl.setInputFormatClass(TextInputFormat.class); tbl.setSerdeParam(serdeConstants.LINE_DELIM, "\n"); tbl.setSerdeParam(serdeConstants.FIELD_DELIM, " "); List<FieldSchema> partCols = new ArrayList<FieldSchema>(1); partCols.add(new FieldSchema("dummy_partition_col", "string", "")); tbl.setPartCols(partCols); Hive.get(conf).createTable(tbl, false); log.info("Created table {}", tableName); // Add partition for the data file AddPartitionDesc partitionDesc = new AddPartitionDesc(database, tableName, false); Map<String, String> partSpec = new HashMap<String, String>(); partSpec.put("dummy_partition_col", "dummy_val"); partitionDesc.addPartition(partSpec, partDir.toUri().toString()); Hive.get(conf).createPartitions(partitionDesc); log.info("{}: Added partition {}", tableName, partDir.toUri().toString()); }
From source file:org.apache.lens.ml.impl.MLRunner.java
License:Apache License
public void createTable(String tableName, String dataFile) throws HiveException { File filedataFile = new File(dataFile); Path dataFilePath = new Path(filedataFile.toURI()); Path partDir = dataFilePath.getParent(); // Create table List<FieldSchema> columns = new ArrayList<FieldSchema>(); // Label is optional. Not used for unsupervised models. // If present, label will be the first column, followed by features if (labelColumn != null) { columns.add(new FieldSchema(labelColumn, "double", "Labelled Column")); }//from w w w. j a va2 s . co m for (String feature : features) { columns.add(new FieldSchema(feature, "double", "Feature " + feature)); } Table tbl = Hive.get(conf).newTable(database + "." + tableName); tbl.setTableType(TableType.MANAGED_TABLE); tbl.getTTable().getSd().setCols(columns); // tbl.getTTable().getParameters().putAll(new HashMap<String, String>()); tbl.setInputFormatClass(TextInputFormat.class); tbl.setSerdeParam(serdeConstants.LINE_DELIM, "\n"); tbl.setSerdeParam(serdeConstants.FIELD_DELIM, " "); List<FieldSchema> partCols = new ArrayList<FieldSchema>(1); partCols.add(new FieldSchema("dummy_partition_col", "string", "")); tbl.setPartCols(partCols); Hive.get(conf).dropTable(database, tableName, false, true); Hive.get(conf).createTable(tbl, true); log.info("Created table {}", tableName); // Add partition for the data file AddPartitionDesc partitionDesc = new AddPartitionDesc(database, tableName, false); Map<String, String> partSpec = new HashMap<String, String>(); partSpec.put("dummy_partition_col", "dummy_val"); partitionDesc.addPartition(partSpec, partDir.toUri().toString()); Hive.get(conf).createPartitions(partitionDesc); log.info("{}: Added partition {}", tableName, partDir.toUri().toString()); }
From source file:org.apache.mahout.classifier.bayes.MultipleOutputFormat.java
License:Apache License
/** * Generate the outfile name based on a given name and the input file name. If * the map input file does not exists (i.e. this is not for a map only job), * the given name is returned unchanged. If the config value for * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given * name is returned unchanged. Otherwise, return a file name consisting of the * N trailing legs of the input file name where N is the config value for * "num.of.trailing.legs.to.use".//from w ww . j a va 2 s . c o m * * @param job * the job config * @param name * the output file name * @return the outfile name based on a given anme and the input file name. */ protected String getInputFileBasedOutputFileName(Configuration job, String name) { String infilepath = job.get("map.input.file"); if (infilepath == null) { // if the map input file does not exists, then return the given name return name; } int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return name; } Path infile = new Path(infilepath); Path parent = infile.getParent(); String midName = infile.getName(); Path outPath = new Path(midName); for (int i = 1; i < numOfTrailingLegsToUse; i++) { if (parent == null) { break; } midName = parent.getName(); if (midName.length() == 0) { break; } parent = parent.getParent(); outPath = new Path(midName, outPath); } return outPath.toString(); }
From source file:org.apache.mahout.clustering.lda.cvb.CVB0Driver.java
License:Apache License
private double calculatePerplexity(Configuration conf, Path corpusPath, Path modelPath, int iteration) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "Calculating perplexity for " + modelPath; log.info("About to run: {}", jobName); Path outputPath = perplexityPath(modelPath.getParent(), iteration); Job job = prepareJob(corpusPath, outputPath, CachingCVB0PerplexityMapper.class, DoubleWritable.class, DoubleWritable.class, DualDoubleSumReducer.class, DoubleWritable.class, DoubleWritable.class); job.setJobName(jobName);// w w w.j ava2 s . c o m job.setCombinerClass(DualDoubleSumReducer.class); job.setNumReduceTasks(1); setModelPaths(job, modelPath); HadoopUtil.delete(conf, outputPath); if (!job.waitForCompletion(true)) { throw new InterruptedException("Failed to calculate perplexity for: " + modelPath); } return readPerplexity(conf, modelPath.getParent(), iteration); }