List of usage examples for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority
public static Path getPathWithoutSchemeAndAuthority(Path path)
From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java
License:Apache License
/** * below method will be used to update the file path * for local type/*from w ww . ja va 2s . com*/ * it removes the file:/ from the path * * @param filePath * @param fileType * @return updated file path without url for local */ private static String getUpdatedFilePath(String filePath, FileType fileType) { switch (fileType) { case HDFS: case ALLUXIO: case VIEWFS: return filePath; case LOCAL: default: if (filePath != null && !filePath.isEmpty()) { Path pathWithoutSchemeAndAuthority = Path.getPathWithoutSchemeAndAuthority(new Path(filePath)); return pathWithoutSchemeAndAuthority.toString(); } else { return filePath; } } }
From source file:org.apache.drill.exec.physical.impl.scan.file.FileMetadata.java
License:Apache License
public FileMetadata(Path filePath, Path selectionRoot) { this.filePath = filePath; // If the data source is not a file, no file metadata is available. if (selectionRoot == null || filePath == null) { dirPath = null;/* w w w . j a v a 2 s . c o m*/ return; } // If the query is against a single file, selection root and file path // will be identical, oddly. Path rootPath = Path.getPathWithoutSchemeAndAuthority(selectionRoot); Path bareFilePath = Path.getPathWithoutSchemeAndAuthority(filePath); if (rootPath.equals(bareFilePath)) { dirPath = null; return; } dirPath = ColumnExplorer.parsePartitions(filePath, rootPath, false); if (dirPath == null) { throw new IllegalArgumentException( String.format("Selection root of \"%s\" is not a leading path of \"%s\"", selectionRoot.toString(), filePath.toString())); } }
From source file:org.apache.drill.exec.planner.DFSFilePartitionLocation.java
License:Apache License
public DFSFilePartitionLocation(int max, String selectionRoot, String file, boolean hasDirsOnly) { this.file = file; this.dirs = new String[max]; // strip the scheme and authority if they exist selectionRoot = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)).toString(); int start = file.indexOf(selectionRoot) + selectionRoot.length(); String postPath = file.substring(start); if (postPath.length() == 0) { return;//from w w w . j a v a 2 s .c om } if (postPath.charAt(0) == '/') { postPath = postPath.substring(1); } String[] mostDirs = postPath.split("/"); int maxLoop = Math.min(max, hasDirsOnly ? mostDirs.length : mostDirs.length - 1); for (int i = 0; i < maxLoop; i++) { this.dirs[i] = mostDirs[i]; } }
From source file:org.apache.drill.exec.planner.DFSPartitionLocation.java
License:Apache License
public DFSPartitionLocation(int max, String selectionRoot, String file) { this.file = file; this.dirs = new String[max]; // strip the scheme and authority if they exist selectionRoot = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)).toString(); int start = file.indexOf(selectionRoot) + selectionRoot.length(); String postPath = file.substring(start); if (postPath.length() == 0) { return;// w w w. j ava 2s . c om } if (postPath.charAt(0) == '/') { postPath = postPath.substring(1); } String[] mostDirs = postPath.split("/"); int maxLoop = Math.min(max, mostDirs.length - 1); for (int i = 0; i < maxLoop; i++) { this.dirs[i] = mostDirs[i]; } }
From source file:org.apache.drill.exec.planner.ParquetPartitionDescriptor.java
License:Apache License
private void populatePruningVector(ValueVector v, int index, SchemaPath column, Path file) { Path path = Path.getPathWithoutSchemeAndAuthority(file); TypeProtos.MajorType majorType = getVectorType(column, null); TypeProtos.MinorType type = majorType.getMinorType(); switch (type) { case BIT: {/*from w ww . ja v a2s . co m*/ NullableBitVector bitVector = (NullableBitVector) v; Boolean value = groupScan.getPartitionValue(path, column, Boolean.class); if (value == null) { bitVector.getMutator().setNull(index); } else { bitVector.getMutator().setSafe(index, value ? 1 : 0); } return; } case INT: { NullableIntVector intVector = (NullableIntVector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { intVector.getMutator().setNull(index); } else { intVector.getMutator().setSafe(index, value); } return; } case SMALLINT: { NullableSmallIntVector smallIntVector = (NullableSmallIntVector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { smallIntVector.getMutator().setNull(index); } else { smallIntVector.getMutator().setSafe(index, value.shortValue()); } return; } case TINYINT: { NullableTinyIntVector tinyIntVector = (NullableTinyIntVector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { tinyIntVector.getMutator().setNull(index); } else { tinyIntVector.getMutator().setSafe(index, value.byteValue()); } return; } case UINT1: { NullableUInt1Vector intVector = (NullableUInt1Vector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { intVector.getMutator().setNull(index); } else { intVector.getMutator().setSafe(index, value.byteValue()); } return; } case UINT2: { NullableUInt2Vector intVector = (NullableUInt2Vector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { intVector.getMutator().setNull(index); } else { intVector.getMutator().setSafe(index, (char) value.shortValue()); } return; } case UINT4: { NullableUInt4Vector intVector = (NullableUInt4Vector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { intVector.getMutator().setNull(index); } else { intVector.getMutator().setSafe(index, value); } return; } case BIGINT: { NullableBigIntVector bigIntVector = (NullableBigIntVector) v; Long value = groupScan.getPartitionValue(path, column, Long.class); if (value == null) { bigIntVector.getMutator().setNull(index); } else { bigIntVector.getMutator().setSafe(index, value); } return; } case FLOAT4: { NullableFloat4Vector float4Vector = (NullableFloat4Vector) v; Float value = groupScan.getPartitionValue(path, column, Float.class); if (value == null) { float4Vector.getMutator().setNull(index); } else { float4Vector.getMutator().setSafe(index, value); } return; } case FLOAT8: { NullableFloat8Vector float8Vector = (NullableFloat8Vector) v; Double value = groupScan.getPartitionValue(path, column, Double.class); if (value == null) { float8Vector.getMutator().setNull(index); } else { float8Vector.getMutator().setSafe(index, value); } return; } case VARBINARY: { NullableVarBinaryVector varBinaryVector = (NullableVarBinaryVector) v; Object s = groupScan.getPartitionValue(path, column, Object.class); byte[] bytes; if (s == null) { varBinaryVector.getMutator().setNull(index); return; } else { bytes = getBytes(type, s); } varBinaryVector.getMutator().setSafe(index, bytes, 0, bytes.length); return; } case VARDECIMAL: { NullableVarDecimalVector decimalVector = (NullableVarDecimalVector) v; Object s = groupScan.getPartitionValue(path, column, Object.class); byte[] bytes; if (s == null) { decimalVector.getMutator().setNull(index); return; } else if (s instanceof Integer) { bytes = Ints.toByteArray((int) s); } else if (s instanceof Long) { bytes = Longs.toByteArray((long) s); } else { bytes = getBytes(type, s); } decimalVector.getMutator().setSafe(index, bytes, 0, bytes.length); return; } case DATE: { NullableDateVector dateVector = (NullableDateVector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { dateVector.getMutator().setNull(index); } else { dateVector.getMutator().setSafe(index, value * (long) DateTimeConstants.MILLIS_PER_DAY); } return; } case TIME: { NullableTimeVector timeVector = (NullableTimeVector) v; Integer value = groupScan.getPartitionValue(path, column, Integer.class); if (value == null) { timeVector.getMutator().setNull(index); } else { timeVector.getMutator().setSafe(index, value); } return; } case TIMESTAMP: { NullableTimeStampVector timeStampVector = (NullableTimeStampVector) v; Long value = groupScan.getPartitionValue(path, column, Long.class); if (value == null) { timeStampVector.getMutator().setNull(index); } else { timeStampVector.getMutator().setSafe(index, value); } return; } case VARCHAR: { NullableVarCharVector varCharVector = (NullableVarCharVector) v; Object s = groupScan.getPartitionValue(path, column, Object.class); byte[] bytes; if (s == null) { varCharVector.getMutator().setNull(index); return; } else { bytes = getBytes(type, s); } varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length); return; } case INTERVAL: { NullableIntervalVector intervalVector = (NullableIntervalVector) v; Object s = groupScan.getPartitionValue(path, column, Object.class); byte[] bytes; if (s == null) { intervalVector.getMutator().setNull(index); return; } else { bytes = getBytes(type, s); } intervalVector.getMutator().setSafe(index, 1, ParquetReaderUtility.getIntFromLEBytes(bytes, 0), ParquetReaderUtility.getIntFromLEBytes(bytes, 4), ParquetReaderUtility.getIntFromLEBytes(bytes, 8)); return; } default: throw new UnsupportedOperationException("Unsupported type: " + type); } }
From source file:org.apache.drill.exec.store.ColumnExplorer.java
License:Apache License
/** * Creates map with implicit columns where key is column name, value is columns actual value. * This map contains partition and implicit file columns (if requested). * Partition columns names are formed based in partition designator and value index. * * @param filePath file path, used to populate file implicit columns * @param partitionValues list of partition values * @param includeFileImplicitColumns if file implicit columns should be included into the result * @return implicit columns map/*w w w . j a va 2 s .c om*/ */ public Map<String, String> populateImplicitColumns(Path filePath, List<String> partitionValues, boolean includeFileImplicitColumns) { Map<String, String> implicitValues = new LinkedHashMap<>(); for (int i = 0; i < partitionValues.size(); i++) { if (isStarQuery || selectedPartitionColumns.contains(i)) { implicitValues.put(partitionDesignator + i, partitionValues.get(i)); } } if (includeFileImplicitColumns) { Path path = Path.getPathWithoutSchemeAndAuthority(filePath); for (Map.Entry<String, ImplicitFileColumns> entry : selectedImplicitColumns.entrySet()) { implicitValues.put(entry.getKey(), entry.getValue().getValue(path)); } } return implicitValues; }
From source file:org.apache.drill.exec.store.dfs.FileSelection.java
License:Apache License
/** * Returns longest common path for the given list of files. * * @param files list of files.//w w w . j a v a 2 s .c o m * @return longest common path */ private static String commonPathForFiles(final List<String> files) { if (files == null || files.isEmpty()) { return ""; } final int total = files.size(); final String[][] folders = new String[total][]; int shortest = Integer.MAX_VALUE; for (int i = 0; i < total; i++) { final Path path = new Path(files.get(i)); folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR); shortest = Math.min(shortest, folders[i].length); } int latest; out: for (latest = 0; latest < shortest; latest++) { final String current = folders[0][latest]; for (int i = 1; i < folders.length; i++) { if (!current.equals(folders[i][latest])) { break out; } } } final Path path = new Path(files.get(0)); final URI uri = path.toUri(); final String pathString = buildPath(folders[0], latest); return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString(); }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java
License:Apache License
public HiveDrillNativeParquetScan(String userName, List<SchemaPath> columns, HiveStoragePlugin hiveStoragePlugin, List<LogicalInputSplit> logicalInputSplits, Map<String, String> confProperties, ParquetReaderConfig readerConfig, LogicalExpression filter) throws IOException { super(userName, columns, new ArrayList<>(), readerConfig, filter); this.hiveStoragePlugin = hiveStoragePlugin; this.hivePartitionHolder = new HivePartitionHolder(); this.confProperties = confProperties; for (LogicalInputSplit logicalInputSplit : logicalInputSplits) { Iterator<InputSplit> iterator = logicalInputSplit.getInputSplits().iterator(); // logical input split contains list of splits by files // we need to read path of only one to get file path assert iterator.hasNext(); InputSplit split = iterator.next(); assert split instanceof FileSplit; FileSplit fileSplit = (FileSplit) split; Path finalPath = fileSplit.getPath(); Path pathString = Path.getPathWithoutSchemeAndAuthority(finalPath); entries.add(new ReadEntryWithPath(pathString)); // store partition values per path Partition partition = logicalInputSplit.getPartition(); if (partition != null) { hivePartitionHolder.add(pathString, partition.getValues()); }//from w w w.j a v a 2s. c om } init(); }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java
License:Apache License
@Override protected void initInternal() throws IOException { Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>(); for (ReadEntryWithPath entry : entries) { Path path = entry.getPath(); Configuration conf = new ProjectionPusher() .pushProjectionsAndFilters(new JobConf(hiveStoragePlugin.getHiveConf()), path.getParent()); FileSystem fs = path.getFileSystem(conf); fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs); }//from ww w . java 2s. co m parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig); }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java
License:Apache License
@Override public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config, List<RecordBatch> children) throws ExecutionSetupException { final Table table = config.getTable(); final List<InputSplit> splits = config.getInputSplits(); final List<Partition> partitions = config.getPartitions(); final List<SchemaPath> columns = config.getColumns(); final String partitionDesignator = context.getOptions() .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val; List<Map<String, String>> implicitColumns = Lists.newLinkedList(); boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns); final boolean hasPartitions = (partitions != null && partitions.size() > 0); final List<String[]> partitionColumns = Lists.newArrayList(); final List<Integer> selectedPartitionColumns = Lists.newArrayList(); List<SchemaPath> newColumns = columns; if (!selectAllQuery) { // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the // ParquetRecordReader. Partition columns are passed to ScanBatch. newColumns = Lists.newArrayList(); Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator)); for (SchemaPath column : columns) { Matcher m = pattern.matcher(column.getAsUnescapedPath()); if (m.matches()) { selectedPartitionColumns.add( Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length()))); } else { newColumns.add(column);//from w w w . ja va 2 s . co m } } } final OperatorContext oContext = context.newOperatorContext(config); int currentPartitionIndex = 0; final List<RecordReader> readers = Lists.newArrayList(); final HiveConf conf = config.getHiveConf(); // TODO: In future we can get this cache from Metadata cached on filesystem. final Map<String, ParquetMetadata> footerCache = Maps.newHashMap(); Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap(); try { for (InputSplit split : splits) { final FileSplit fileSplit = (FileSplit) split; final Path finalPath = fileSplit.getPath(); final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf), finalPath.getParent()); final FileSystem fs = finalPath.getFileSystem(cloneJob); ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString()); if (parquetMetadata == null) { parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath); footerCache.put(finalPath.toString(), parquetMetadata); } final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata); for (int rowGroupNum : rowGroupNums) { readers.add(new ParquetRecordReader(context, Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), parquetMetadata, newColumns)); Map<String, String> implicitValues = Maps.newLinkedHashMap(); if (hasPartitions) { List<String> values = partitions.get(currentPartitionIndex).getValues(); for (int i = 0; i < values.size(); i++) { if (selectAllQuery || selectedPartitionColumns.contains(i)) { implicitValues.put(partitionDesignator + i, values.get(i)); } } } implicitColumns.add(implicitValues); if (implicitValues.size() > mapWithMaxColumns.size()) { mapWithMaxColumns = implicitValues; } } currentPartitionIndex++; } } catch (final IOException | RuntimeException e) { AutoCloseables.close(e, readers); throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e); } // all readers should have the same number of implicit columns, add missing ones with value null mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null)); for (Map<String, String> map : implicitColumns) { map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight()); } // If there are no readers created (which is possible when the table is empty or no row groups are matched), // create an empty RecordReader to output the schema if (readers.size() == 0) { readers.add(new HiveRecordReader(table, null, null, columns, context, conf, ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName()))); } return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns); }