Example usage for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent()

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.drill.exec.planner.sql.handlers.AnalyzeTableHandler.java

License:Apache License

private boolean isStatsStale(DrillFileSystem fs, Path statsFilePath) throws IOException {
    long statsFileModifyTime = fs.getFileStatus(statsFilePath).getModificationTime();
    Path parentPath = statsFilePath.getParent();
    FileStatus directoryStatus = fs.getFileStatus(parentPath);
    // Parent directory modified after stats collection?
    return directoryStatus.getModificationTime() > statsFileModifyTime
            || tableModified(fs, parentPath, statsFileModifyTime);
}

From source file:org.apache.drill.exec.record.metadata.schema.PathSchemaProvider.java

License:Apache License

public PathSchemaProvider(FileSystem fs, Path path) throws IOException {
    this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf());

    if (!fs.exists(path.getParent())) {
        throw new IOException(
                String.format("Parent path for schema file [%s] does not exist", path.toUri().getPath()));
    }/*w  w  w.  j a  v  a  2 s.c  o  m*/

    this.path = path;
}

From source file:org.apache.drill.exec.store.ColumnExplorer.java

License:Apache License

/**
 * Low-level parse of partitions, returned as a string array. Returns a
 * null array for invalid values.//from  w  ww .j a  va2 s. c o m
 *
 * @param file file path
 * @param root root directory
 * @param hasDirsOnly whether it is file or directory
 * @return array of directory names, or null if the arguments are invalid
 */
public static String[] parsePartitions(Path file, Path root, boolean hasDirsOnly) {
    if (file == null || root == null) {
        return null;
    }

    if (!hasDirsOnly) {
        file = file.getParent();
    }

    int rootDepth = root.depth();
    int fileDepth = file.depth();
    int diffCount = fileDepth - rootDepth;
    if (diffCount < 0) {
        return null;
    }

    String[] diffDirectoryNames = new String[diffCount];

    // start filling in array from the end
    for (int i = rootDepth; fileDepth > i; i++) {
        // place in the end of array
        diffDirectoryNames[fileDepth - i - 1] = file.getName();
        file = file.getParent();
    }

    return diffDirectoryNames;
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetRowGroupScan.java

License:Apache License

@Override
public Configuration getFsConf(RowGroupReadEntry rowGroupReadEntry) throws IOException {
    Path path = rowGroupReadEntry.getPath().getParent();
    return new ProjectionPusher().pushProjectionsAndFilters(
            new JobConf(HiveUtilities.generateHiveConf(hiveStoragePlugin.getHiveConf(), confProperties)),
            path.getParent());
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java

License:Apache License

@Override
protected void initInternal() throws IOException {
    Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>();
    for (ReadEntryWithPath entry : entries) {
        Path path = entry.getPath();
        Configuration conf = new ProjectionPusher()
                .pushProjectionsAndFilters(new JobConf(hiveStoragePlugin.getHiveConf()), path.getParent());
        FileSystem fs = path.getFileSystem(conf);
        fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs);
    }/*ww w .j av  a 2  s.  c  om*/
    parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig);
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java

License:Apache License

@Override
public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config,
        List<RecordBatch> children) throws ExecutionSetupException {
    final Table table = config.getTable();
    final List<InputSplit> splits = config.getInputSplits();
    final List<Partition> partitions = config.getPartitions();
    final List<SchemaPath> columns = config.getColumns();
    final String partitionDesignator = context.getOptions()
            .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
    List<Map<String, String>> implicitColumns = Lists.newLinkedList();
    boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns);

    final boolean hasPartitions = (partitions != null && partitions.size() > 0);

    final List<String[]> partitionColumns = Lists.newArrayList();
    final List<Integer> selectedPartitionColumns = Lists.newArrayList();
    List<SchemaPath> newColumns = columns;
    if (!selectAllQuery) {
        // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the
        // ParquetRecordReader. Partition columns are passed to ScanBatch.
        newColumns = Lists.newArrayList();
        Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));
        for (SchemaPath column : columns) {
            Matcher m = pattern.matcher(column.getAsUnescapedPath());
            if (m.matches()) {
                selectedPartitionColumns.add(
                        Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length())));
            } else {
                newColumns.add(column);/*  ww w.  j a  v  a 2s  .  c o m*/
            }
        }
    }

    final OperatorContext oContext = context.newOperatorContext(config);

    int currentPartitionIndex = 0;
    final List<RecordReader> readers = Lists.newArrayList();

    final HiveConf conf = config.getHiveConf();

    // TODO: In future we can get this cache from Metadata cached on filesystem.
    final Map<String, ParquetMetadata> footerCache = Maps.newHashMap();

    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    try {
        for (InputSplit split : splits) {
            final FileSplit fileSplit = (FileSplit) split;
            final Path finalPath = fileSplit.getPath();
            final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf),
                    finalPath.getParent());
            final FileSystem fs = finalPath.getFileSystem(cloneJob);

            ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString());
            if (parquetMetadata == null) {
                parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
                footerCache.put(finalPath.toString(), parquetMetadata);
            }
            final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata);

            for (int rowGroupNum : rowGroupNums) {
                readers.add(new ParquetRecordReader(context,
                        Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs,
                        CodecFactory.createDirectCodecFactory(fs.getConf(),
                                new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0),
                        parquetMetadata, newColumns));
                Map<String, String> implicitValues = Maps.newLinkedHashMap();

                if (hasPartitions) {
                    List<String> values = partitions.get(currentPartitionIndex).getValues();
                    for (int i = 0; i < values.size(); i++) {
                        if (selectAllQuery || selectedPartitionColumns.contains(i)) {
                            implicitValues.put(partitionDesignator + i, values.get(i));
                        }
                    }
                }
                implicitColumns.add(implicitValues);
                if (implicitValues.size() > mapWithMaxColumns.size()) {
                    mapWithMaxColumns = implicitValues;
                }
            }
            currentPartitionIndex++;
        }
    } catch (final IOException | RuntimeException e) {
        AutoCloseables.close(e, readers);
        throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e);
    }

    // all readers should have the same number of implicit columns, add missing ones with value null
    mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight());
    }

    // If there are no readers created (which is possible when the table is empty or no row groups are matched),
    // create an empty RecordReader to output the schema
    if (readers.size() == 0) {
        readers.add(new HiveRecordReader(table, null, null, columns, context, conf,
                ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName())));
    }

    return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns);
}

From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java

License:Apache License

/**
 * Read the parquet metadata from a file
 *
 * @param path to metadata file/*from   w ww .j  a va2  s.  c  o m*/
 * @param dirsOnly true for {@link Metadata#METADATA_DIRECTORIES_FILENAME}
 *                 or false for {@link Metadata#METADATA_FILENAME} files reading
 * @param metaContext current metadata context
 */
private void readBlockMeta(Path path, boolean dirsOnly, MetadataContext metaContext, FileSystem fs) {
    Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    Path metadataParentDir = Path.getPathWithoutSchemeAndAuthority(path.getParent());
    String metadataParentDirPath = metadataParentDir.toUri().getPath();
    ObjectMapper mapper = new ObjectMapper();

    final SimpleModule serialModule = new SimpleModule();
    serialModule.addDeserializer(SchemaPath.class, new SchemaPath.De());
    serialModule.addKeyDeserializer(Metadata_V2.ColumnTypeMetadata_v2.Key.class,
            new Metadata_V2.ColumnTypeMetadata_v2.Key.DeSerializer());
    serialModule.addKeyDeserializer(ColumnTypeMetadata_v3.Key.class,
            new ColumnTypeMetadata_v3.Key.DeSerializer());

    AfterburnerModule module = new AfterburnerModule();
    module.setUseOptimizedBeanDeserializer(true);

    mapper.registerModule(serialModule);
    mapper.registerModule(module);
    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    try (InputStream is = fs.open(path)) {
        boolean alreadyCheckedModification;
        boolean newMetadata = false;
        alreadyCheckedModification = metaContext.getStatus(metadataParentDirPath);

        if (dirsOnly) {
            parquetTableMetadataDirs = mapper.readValue(is, ParquetTableMetadataDirs.class);
            if (timer != null) {
                logger.debug("Took {} ms to read directories from directory cache file",
                        timer.elapsed(TimeUnit.MILLISECONDS));
                timer.stop();
            }
            parquetTableMetadataDirs.updateRelativePaths(metadataParentDirPath);
            if (!alreadyCheckedModification && tableModified(parquetTableMetadataDirs.getDirectories(), path,
                    metadataParentDir, metaContext, fs)) {
                parquetTableMetadataDirs = (createMetaFilesRecursively(
                        Path.getPathWithoutSchemeAndAuthority(path.getParent()), fs, true, null)).getRight();
                newMetadata = true;
            }
        } else {
            parquetTableMetadata = mapper.readValue(is, ParquetTableMetadataBase.class);
            if (timer != null) {
                logger.debug("Took {} ms to read metadata from cache file",
                        timer.elapsed(TimeUnit.MILLISECONDS));
                timer.stop();
            }
            if (new MetadataVersion(parquetTableMetadata.getMetadataVersion())
                    .compareTo(new MetadataVersion(3, 0)) >= 0) {
                ((ParquetTableMetadata_v3) parquetTableMetadata).updateRelativePaths(metadataParentDirPath);
            }
            if (!alreadyCheckedModification && tableModified(parquetTableMetadata.getDirectories(), path,
                    metadataParentDir, metaContext, fs)) {
                // TODO change with current columns in existing metadata (auto refresh feature)
                parquetTableMetadata = (createMetaFilesRecursively(
                        Path.getPathWithoutSchemeAndAuthority(path.getParent()), fs, true, null)).getLeft();
                newMetadata = true;
            }

            // DRILL-5009: Remove the RowGroup if it is empty
            List<? extends ParquetFileMetadata> files = parquetTableMetadata.getFiles();
            for (ParquetFileMetadata file : files) {
                List<? extends RowGroupMetadata> rowGroups = file.getRowGroups();
                rowGroups.removeIf(r -> r.getRowCount() == 0);
            }

        }
        if (newMetadata) {
            // if new metadata files were created, invalidate the existing metadata context
            metaContext.clear();
        }
    } catch (IOException e) {
        logger.error("Failed to read '{}' metadata file", path, e);
        metaContext.setMetadataCacheCorrupted(true);
    }
}

From source file:org.apache.drill.exec.store.parquet.Metadata.java

License:Apache License

/**
 * Read the parquet metadata from a file
 *
 * @param path//  ww w.j av  a2s  .  co m
 * @return
 * @throws IOException
 */
private void readBlockMeta(String path, boolean dirsOnly, MetadataContext metaContext) throws IOException {
    Stopwatch timer = Stopwatch.createStarted();
    Path p = new Path(path);
    Path parentDir = p.getParent(); // parent directory of the metadata file
    ObjectMapper mapper = new ObjectMapper();

    final SimpleModule serialModule = new SimpleModule();
    serialModule.addDeserializer(SchemaPath.class, new SchemaPath.De());
    serialModule.addKeyDeserializer(ColumnTypeMetadata_v2.Key.class,
            new ColumnTypeMetadata_v2.Key.DeSerializer());

    AfterburnerModule module = new AfterburnerModule();
    module.setUseOptimizedBeanDeserializer(true);

    mapper.registerModule(serialModule);
    mapper.registerModule(module);
    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    FSDataInputStream is = fs.open(p);

    boolean alreadyCheckedModification = false;
    boolean newMetadata = false;

    if (metaContext != null) {
        alreadyCheckedModification = metaContext.getStatus(parentDir.toString());
    }

    if (dirsOnly) {
        parquetTableMetadataDirs = mapper.readValue(is, ParquetTableMetadataDirs.class);
        logger.info("Took {} ms to read directories from directory cache file",
                timer.elapsed(TimeUnit.MILLISECONDS));
        timer.stop();
        if (!alreadyCheckedModification
                && tableModified(parquetTableMetadataDirs.getDirectories(), p, parentDir, metaContext)) {
            parquetTableMetadataDirs = (createMetaFilesRecursively(
                    Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getRight();
            newMetadata = true;
        }
    } else {
        parquetTableMetadata = mapper.readValue(is, ParquetTableMetadataBase.class);
        logger.info("Took {} ms to read metadata from cache file", timer.elapsed(TimeUnit.MILLISECONDS));
        timer.stop();
        if (!alreadyCheckedModification
                && tableModified(parquetTableMetadata.getDirectories(), p, parentDir, metaContext)) {
            parquetTableMetadata = (createMetaFilesRecursively(
                    Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getLeft();
            newMetadata = true;
        }
    }

    if (newMetadata && metaContext != null) {
        // if new metadata files were created, invalidate the existing metadata context
        metaContext.clear();
    }

}

From source file:org.apache.drill.exec.store.StorageStrategy.java

License:Apache License

/**
 * Creates passed file on appropriate file system.
 * Before creation checks which parent directories do not exists.
 * Applies storage strategy rules to all newly created directories and file.
 * Will return first created parent path or file if no new parent paths created.
 *
 * Case 1: /a/b -> already exists, attempt to create /a/b/c/some_file.txt
 * Will create file and return /a/b/c.// ww w.j av a 2  s. c om
 * Case 2: /a/b/c -> already exists, attempt to create /a/b/c/some_file.txt
 * Will create file and return /a/b/c/some_file.txt.
 * Case 3: /a/b/c/some_file.txt -> already exists, will fail.
 *
 * @param fs file system where file should be located
 * @param file file path
 * @return first created parent path or file
 * @throws IOException is thrown in case of problems while creating path, setting permission
 *         or adding path to delete on exit list
 */
public Path createFileAndApply(FileSystem fs, Path file) throws IOException {
    List<Path> locations = getNonExistentLocations(fs, file.getParent());
    if (!fs.createNewFile(file)) {
        throw new IOException(String.format("File [%s] already exists on file system [%s].",
                file.toUri().getPath(), fs.getUri()));
    }
    applyToFile(fs, file);

    if (locations.isEmpty()) {
        return file;
    }

    for (Path location : locations) {
        applyStrategy(fs, location, folderPermission, deleteOnExit);
    }
    return locations.get(locations.size() - 1);
}

From source file:org.apache.drill.exec.store.StorageStrategy.java

License:Apache License

/**
 * Returns list of parent locations that do not exist, including initial location.
 * First in the list will be initial location,
 * last in the list will be last parent location that does not exist.
 * If all locations exist, empty list will be returned.
 *
 * Case 1: if /a/b exists and passed location is /a/b/c/d,
 * will return list with two elements: 0 -> /a/b/c/d, 1 -> /a/b/c
 * Case 2: if /a/b exists and passed location is /a/b, will return empty list.
 *
 * @param fs file system where locations should be located
 * @param path location path/*ww  w.j a  v a  2s .com*/
 * @return list of locations that do not exist
 * @throws IOException in case of troubles accessing file system
 */
private List<Path> getNonExistentLocations(FileSystem fs, Path path) throws IOException {
    List<Path> locations = Lists.newArrayList();
    Path starting = path;
    while (starting != null && !fs.exists(starting)) {
        locations.add(starting);
        starting = starting.getParent();
    }
    return locations;
}