List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:org.apache.drill.exec.planner.sql.handlers.AnalyzeTableHandler.java
License:Apache License
private boolean isStatsStale(DrillFileSystem fs, Path statsFilePath) throws IOException { long statsFileModifyTime = fs.getFileStatus(statsFilePath).getModificationTime(); Path parentPath = statsFilePath.getParent(); FileStatus directoryStatus = fs.getFileStatus(parentPath); // Parent directory modified after stats collection? return directoryStatus.getModificationTime() > statsFileModifyTime || tableModified(fs, parentPath, statsFileModifyTime); }
From source file:org.apache.drill.exec.record.metadata.schema.PathSchemaProvider.java
License:Apache License
public PathSchemaProvider(FileSystem fs, Path path) throws IOException { this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf()); if (!fs.exists(path.getParent())) { throw new IOException( String.format("Parent path for schema file [%s] does not exist", path.toUri().getPath())); }/*w w w. j a v a 2 s.c o m*/ this.path = path; }
From source file:org.apache.drill.exec.store.ColumnExplorer.java
License:Apache License
/** * Low-level parse of partitions, returned as a string array. Returns a * null array for invalid values.//from w ww .j a va2 s. c o m * * @param file file path * @param root root directory * @param hasDirsOnly whether it is file or directory * @return array of directory names, or null if the arguments are invalid */ public static String[] parsePartitions(Path file, Path root, boolean hasDirsOnly) { if (file == null || root == null) { return null; } if (!hasDirsOnly) { file = file.getParent(); } int rootDepth = root.depth(); int fileDepth = file.depth(); int diffCount = fileDepth - rootDepth; if (diffCount < 0) { return null; } String[] diffDirectoryNames = new String[diffCount]; // start filling in array from the end for (int i = rootDepth; fileDepth > i; i++) { // place in the end of array diffDirectoryNames[fileDepth - i - 1] = file.getName(); file = file.getParent(); } return diffDirectoryNames; }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetRowGroupScan.java
License:Apache License
@Override public Configuration getFsConf(RowGroupReadEntry rowGroupReadEntry) throws IOException { Path path = rowGroupReadEntry.getPath().getParent(); return new ProjectionPusher().pushProjectionsAndFilters( new JobConf(HiveUtilities.generateHiveConf(hiveStoragePlugin.getHiveConf(), confProperties)), path.getParent()); }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java
License:Apache License
@Override protected void initInternal() throws IOException { Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>(); for (ReadEntryWithPath entry : entries) { Path path = entry.getPath(); Configuration conf = new ProjectionPusher() .pushProjectionsAndFilters(new JobConf(hiveStoragePlugin.getHiveConf()), path.getParent()); FileSystem fs = path.getFileSystem(conf); fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs); }/*ww w .j av a 2 s. c om*/ parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig); }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java
License:Apache License
@Override public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config, List<RecordBatch> children) throws ExecutionSetupException { final Table table = config.getTable(); final List<InputSplit> splits = config.getInputSplits(); final List<Partition> partitions = config.getPartitions(); final List<SchemaPath> columns = config.getColumns(); final String partitionDesignator = context.getOptions() .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val; List<Map<String, String>> implicitColumns = Lists.newLinkedList(); boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns); final boolean hasPartitions = (partitions != null && partitions.size() > 0); final List<String[]> partitionColumns = Lists.newArrayList(); final List<Integer> selectedPartitionColumns = Lists.newArrayList(); List<SchemaPath> newColumns = columns; if (!selectAllQuery) { // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the // ParquetRecordReader. Partition columns are passed to ScanBatch. newColumns = Lists.newArrayList(); Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator)); for (SchemaPath column : columns) { Matcher m = pattern.matcher(column.getAsUnescapedPath()); if (m.matches()) { selectedPartitionColumns.add( Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length()))); } else { newColumns.add(column);/* ww w. j a v a 2s . c o m*/ } } } final OperatorContext oContext = context.newOperatorContext(config); int currentPartitionIndex = 0; final List<RecordReader> readers = Lists.newArrayList(); final HiveConf conf = config.getHiveConf(); // TODO: In future we can get this cache from Metadata cached on filesystem. final Map<String, ParquetMetadata> footerCache = Maps.newHashMap(); Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap(); try { for (InputSplit split : splits) { final FileSplit fileSplit = (FileSplit) split; final Path finalPath = fileSplit.getPath(); final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf), finalPath.getParent()); final FileSystem fs = finalPath.getFileSystem(cloneJob); ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString()); if (parquetMetadata == null) { parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath); footerCache.put(finalPath.toString(), parquetMetadata); } final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata); for (int rowGroupNum : rowGroupNums) { readers.add(new ParquetRecordReader(context, Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), parquetMetadata, newColumns)); Map<String, String> implicitValues = Maps.newLinkedHashMap(); if (hasPartitions) { List<String> values = partitions.get(currentPartitionIndex).getValues(); for (int i = 0; i < values.size(); i++) { if (selectAllQuery || selectedPartitionColumns.contains(i)) { implicitValues.put(partitionDesignator + i, values.get(i)); } } } implicitColumns.add(implicitValues); if (implicitValues.size() > mapWithMaxColumns.size()) { mapWithMaxColumns = implicitValues; } } currentPartitionIndex++; } } catch (final IOException | RuntimeException e) { AutoCloseables.close(e, readers); throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e); } // all readers should have the same number of implicit columns, add missing ones with value null mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null)); for (Map<String, String> map : implicitColumns) { map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight()); } // If there are no readers created (which is possible when the table is empty or no row groups are matched), // create an empty RecordReader to output the schema if (readers.size() == 0) { readers.add(new HiveRecordReader(table, null, null, columns, context, conf, ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName()))); } return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns); }
From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java
License:Apache License
/** * Read the parquet metadata from a file * * @param path to metadata file/*from w ww .j a va2 s. c o m*/ * @param dirsOnly true for {@link Metadata#METADATA_DIRECTORIES_FILENAME} * or false for {@link Metadata#METADATA_FILENAME} files reading * @param metaContext current metadata context */ private void readBlockMeta(Path path, boolean dirsOnly, MetadataContext metaContext, FileSystem fs) { Stopwatch timer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; Path metadataParentDir = Path.getPathWithoutSchemeAndAuthority(path.getParent()); String metadataParentDirPath = metadataParentDir.toUri().getPath(); ObjectMapper mapper = new ObjectMapper(); final SimpleModule serialModule = new SimpleModule(); serialModule.addDeserializer(SchemaPath.class, new SchemaPath.De()); serialModule.addKeyDeserializer(Metadata_V2.ColumnTypeMetadata_v2.Key.class, new Metadata_V2.ColumnTypeMetadata_v2.Key.DeSerializer()); serialModule.addKeyDeserializer(ColumnTypeMetadata_v3.Key.class, new ColumnTypeMetadata_v3.Key.DeSerializer()); AfterburnerModule module = new AfterburnerModule(); module.setUseOptimizedBeanDeserializer(true); mapper.registerModule(serialModule); mapper.registerModule(module); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); try (InputStream is = fs.open(path)) { boolean alreadyCheckedModification; boolean newMetadata = false; alreadyCheckedModification = metaContext.getStatus(metadataParentDirPath); if (dirsOnly) { parquetTableMetadataDirs = mapper.readValue(is, ParquetTableMetadataDirs.class); if (timer != null) { logger.debug("Took {} ms to read directories from directory cache file", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } parquetTableMetadataDirs.updateRelativePaths(metadataParentDirPath); if (!alreadyCheckedModification && tableModified(parquetTableMetadataDirs.getDirectories(), path, metadataParentDir, metaContext, fs)) { parquetTableMetadataDirs = (createMetaFilesRecursively( Path.getPathWithoutSchemeAndAuthority(path.getParent()), fs, true, null)).getRight(); newMetadata = true; } } else { parquetTableMetadata = mapper.readValue(is, ParquetTableMetadataBase.class); if (timer != null) { logger.debug("Took {} ms to read metadata from cache file", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); } if (new MetadataVersion(parquetTableMetadata.getMetadataVersion()) .compareTo(new MetadataVersion(3, 0)) >= 0) { ((ParquetTableMetadata_v3) parquetTableMetadata).updateRelativePaths(metadataParentDirPath); } if (!alreadyCheckedModification && tableModified(parquetTableMetadata.getDirectories(), path, metadataParentDir, metaContext, fs)) { // TODO change with current columns in existing metadata (auto refresh feature) parquetTableMetadata = (createMetaFilesRecursively( Path.getPathWithoutSchemeAndAuthority(path.getParent()), fs, true, null)).getLeft(); newMetadata = true; } // DRILL-5009: Remove the RowGroup if it is empty List<? extends ParquetFileMetadata> files = parquetTableMetadata.getFiles(); for (ParquetFileMetadata file : files) { List<? extends RowGroupMetadata> rowGroups = file.getRowGroups(); rowGroups.removeIf(r -> r.getRowCount() == 0); } } if (newMetadata) { // if new metadata files were created, invalidate the existing metadata context metaContext.clear(); } } catch (IOException e) { logger.error("Failed to read '{}' metadata file", path, e); metaContext.setMetadataCacheCorrupted(true); } }
From source file:org.apache.drill.exec.store.parquet.Metadata.java
License:Apache License
/** * Read the parquet metadata from a file * * @param path// ww w.j av a2s . co m * @return * @throws IOException */ private void readBlockMeta(String path, boolean dirsOnly, MetadataContext metaContext) throws IOException { Stopwatch timer = Stopwatch.createStarted(); Path p = new Path(path); Path parentDir = p.getParent(); // parent directory of the metadata file ObjectMapper mapper = new ObjectMapper(); final SimpleModule serialModule = new SimpleModule(); serialModule.addDeserializer(SchemaPath.class, new SchemaPath.De()); serialModule.addKeyDeserializer(ColumnTypeMetadata_v2.Key.class, new ColumnTypeMetadata_v2.Key.DeSerializer()); AfterburnerModule module = new AfterburnerModule(); module.setUseOptimizedBeanDeserializer(true); mapper.registerModule(serialModule); mapper.registerModule(module); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); FSDataInputStream is = fs.open(p); boolean alreadyCheckedModification = false; boolean newMetadata = false; if (metaContext != null) { alreadyCheckedModification = metaContext.getStatus(parentDir.toString()); } if (dirsOnly) { parquetTableMetadataDirs = mapper.readValue(is, ParquetTableMetadataDirs.class); logger.info("Took {} ms to read directories from directory cache file", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); if (!alreadyCheckedModification && tableModified(parquetTableMetadataDirs.getDirectories(), p, parentDir, metaContext)) { parquetTableMetadataDirs = (createMetaFilesRecursively( Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getRight(); newMetadata = true; } } else { parquetTableMetadata = mapper.readValue(is, ParquetTableMetadataBase.class); logger.info("Took {} ms to read metadata from cache file", timer.elapsed(TimeUnit.MILLISECONDS)); timer.stop(); if (!alreadyCheckedModification && tableModified(parquetTableMetadata.getDirectories(), p, parentDir, metaContext)) { parquetTableMetadata = (createMetaFilesRecursively( Path.getPathWithoutSchemeAndAuthority(p.getParent()).toString())).getLeft(); newMetadata = true; } } if (newMetadata && metaContext != null) { // if new metadata files were created, invalidate the existing metadata context metaContext.clear(); } }
From source file:org.apache.drill.exec.store.StorageStrategy.java
License:Apache License
/** * Creates passed file on appropriate file system. * Before creation checks which parent directories do not exists. * Applies storage strategy rules to all newly created directories and file. * Will return first created parent path or file if no new parent paths created. * * Case 1: /a/b -> already exists, attempt to create /a/b/c/some_file.txt * Will create file and return /a/b/c.// ww w.j av a 2 s. c om * Case 2: /a/b/c -> already exists, attempt to create /a/b/c/some_file.txt * Will create file and return /a/b/c/some_file.txt. * Case 3: /a/b/c/some_file.txt -> already exists, will fail. * * @param fs file system where file should be located * @param file file path * @return first created parent path or file * @throws IOException is thrown in case of problems while creating path, setting permission * or adding path to delete on exit list */ public Path createFileAndApply(FileSystem fs, Path file) throws IOException { List<Path> locations = getNonExistentLocations(fs, file.getParent()); if (!fs.createNewFile(file)) { throw new IOException(String.format("File [%s] already exists on file system [%s].", file.toUri().getPath(), fs.getUri())); } applyToFile(fs, file); if (locations.isEmpty()) { return file; } for (Path location : locations) { applyStrategy(fs, location, folderPermission, deleteOnExit); } return locations.get(locations.size() - 1); }
From source file:org.apache.drill.exec.store.StorageStrategy.java
License:Apache License
/** * Returns list of parent locations that do not exist, including initial location. * First in the list will be initial location, * last in the list will be last parent location that does not exist. * If all locations exist, empty list will be returned. * * Case 1: if /a/b exists and passed location is /a/b/c/d, * will return list with two elements: 0 -> /a/b/c/d, 1 -> /a/b/c * Case 2: if /a/b exists and passed location is /a/b, will return empty list. * * @param fs file system where locations should be located * @param path location path/*ww w.j a v a 2s .com*/ * @return list of locations that do not exist * @throws IOException in case of troubles accessing file system */ private List<Path> getNonExistentLocations(FileSystem fs, Path path) throws IOException { List<Path> locations = Lists.newArrayList(); Path starting = path; while (starting != null && !fs.exists(starting)) { locations.add(starting); starting = starting.getParent(); } return locations; }