Example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority.

Prototype

public static Path getPathWithoutSchemeAndAuthority(Path path)

Source Link

Document

Return a version of the given Path without the scheme information.

Usage

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

/**
 * below method will be used to update the file path
 * for local type/*from w ww  .  ja va 2s .  com*/
 * it removes the file:/ from the path
 *
 * @param filePath
 * @param fileType
 * @return updated file path without url for local
 */
private static String getUpdatedFilePath(String filePath, FileType fileType) {
    switch (fileType) {
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        return filePath;
    case LOCAL:
    default:
        if (filePath != null && !filePath.isEmpty()) {
            Path pathWithoutSchemeAndAuthority = Path.getPathWithoutSchemeAndAuthority(new Path(filePath));
            return pathWithoutSchemeAndAuthority.toString();
        } else {
            return filePath;
        }
    }
}

From source file:org.apache.drill.exec.physical.impl.scan.file.FileMetadata.java

License:Apache License

public FileMetadata(Path filePath, Path selectionRoot) {
    this.filePath = filePath;

    // If the data source is not a file, no file metadata is available.

    if (selectionRoot == null || filePath == null) {
        dirPath = null;/*  w w w .  j  a  v a 2  s . c o  m*/
        return;
    }

    // If the query is against a single file, selection root and file path
    // will be identical, oddly.

    Path rootPath = Path.getPathWithoutSchemeAndAuthority(selectionRoot);
    Path bareFilePath = Path.getPathWithoutSchemeAndAuthority(filePath);
    if (rootPath.equals(bareFilePath)) {
        dirPath = null;
        return;
    }

    dirPath = ColumnExplorer.parsePartitions(filePath, rootPath, false);
    if (dirPath == null) {
        throw new IllegalArgumentException(
                String.format("Selection root of \"%s\" is not a leading path of \"%s\"",
                        selectionRoot.toString(), filePath.toString()));
    }
}

From source file:org.apache.drill.exec.planner.DFSFilePartitionLocation.java

License:Apache License

public DFSFilePartitionLocation(int max, String selectionRoot, String file, boolean hasDirsOnly) {
    this.file = file;
    this.dirs = new String[max];

    // strip the scheme and authority if they exist
    selectionRoot = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)).toString();

    int start = file.indexOf(selectionRoot) + selectionRoot.length();
    String postPath = file.substring(start);
    if (postPath.length() == 0) {
        return;//from w w w . j a  v  a  2 s .c  om
    }
    if (postPath.charAt(0) == '/') {
        postPath = postPath.substring(1);
    }
    String[] mostDirs = postPath.split("/");
    int maxLoop = Math.min(max, hasDirsOnly ? mostDirs.length : mostDirs.length - 1);
    for (int i = 0; i < maxLoop; i++) {
        this.dirs[i] = mostDirs[i];
    }
}

From source file:org.apache.drill.exec.planner.DFSPartitionLocation.java

License:Apache License

public DFSPartitionLocation(int max, String selectionRoot, String file) {
    this.file = file;
    this.dirs = new String[max];

    // strip the scheme and authority if they exist
    selectionRoot = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)).toString();

    int start = file.indexOf(selectionRoot) + selectionRoot.length();
    String postPath = file.substring(start);
    if (postPath.length() == 0) {
        return;//  w  w  w. j ava  2s .  c om
    }
    if (postPath.charAt(0) == '/') {
        postPath = postPath.substring(1);
    }
    String[] mostDirs = postPath.split("/");
    int maxLoop = Math.min(max, mostDirs.length - 1);
    for (int i = 0; i < maxLoop; i++) {
        this.dirs[i] = mostDirs[i];
    }
}

From source file:org.apache.drill.exec.planner.ParquetPartitionDescriptor.java

License:Apache License

private void populatePruningVector(ValueVector v, int index, SchemaPath column, Path file) {
    Path path = Path.getPathWithoutSchemeAndAuthority(file);
    TypeProtos.MajorType majorType = getVectorType(column, null);
    TypeProtos.MinorType type = majorType.getMinorType();
    switch (type) {
    case BIT: {/*from w  ww  .  ja  v a2s . co  m*/
        NullableBitVector bitVector = (NullableBitVector) v;
        Boolean value = groupScan.getPartitionValue(path, column, Boolean.class);
        if (value == null) {
            bitVector.getMutator().setNull(index);
        } else {
            bitVector.getMutator().setSafe(index, value ? 1 : 0);
        }
        return;
    }
    case INT: {
        NullableIntVector intVector = (NullableIntVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case SMALLINT: {
        NullableSmallIntVector smallIntVector = (NullableSmallIntVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            smallIntVector.getMutator().setNull(index);
        } else {
            smallIntVector.getMutator().setSafe(index, value.shortValue());
        }
        return;
    }
    case TINYINT: {
        NullableTinyIntVector tinyIntVector = (NullableTinyIntVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            tinyIntVector.getMutator().setNull(index);
        } else {
            tinyIntVector.getMutator().setSafe(index, value.byteValue());
        }
        return;
    }
    case UINT1: {
        NullableUInt1Vector intVector = (NullableUInt1Vector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, value.byteValue());
        }
        return;
    }
    case UINT2: {
        NullableUInt2Vector intVector = (NullableUInt2Vector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, (char) value.shortValue());
        }
        return;
    }
    case UINT4: {
        NullableUInt4Vector intVector = (NullableUInt4Vector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case BIGINT: {
        NullableBigIntVector bigIntVector = (NullableBigIntVector) v;
        Long value = groupScan.getPartitionValue(path, column, Long.class);
        if (value == null) {
            bigIntVector.getMutator().setNull(index);
        } else {
            bigIntVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case FLOAT4: {
        NullableFloat4Vector float4Vector = (NullableFloat4Vector) v;
        Float value = groupScan.getPartitionValue(path, column, Float.class);
        if (value == null) {
            float4Vector.getMutator().setNull(index);
        } else {
            float4Vector.getMutator().setSafe(index, value);
        }
        return;
    }
    case FLOAT8: {
        NullableFloat8Vector float8Vector = (NullableFloat8Vector) v;
        Double value = groupScan.getPartitionValue(path, column, Double.class);
        if (value == null) {
            float8Vector.getMutator().setNull(index);
        } else {
            float8Vector.getMutator().setSafe(index, value);
        }
        return;
    }
    case VARBINARY: {
        NullableVarBinaryVector varBinaryVector = (NullableVarBinaryVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            varBinaryVector.getMutator().setNull(index);
            return;
        } else {
            bytes = getBytes(type, s);
        }
        varBinaryVector.getMutator().setSafe(index, bytes, 0, bytes.length);
        return;
    }
    case VARDECIMAL: {
        NullableVarDecimalVector decimalVector = (NullableVarDecimalVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            decimalVector.getMutator().setNull(index);
            return;
        } else if (s instanceof Integer) {
            bytes = Ints.toByteArray((int) s);
        } else if (s instanceof Long) {
            bytes = Longs.toByteArray((long) s);
        } else {
            bytes = getBytes(type, s);
        }
        decimalVector.getMutator().setSafe(index, bytes, 0, bytes.length);
        return;
    }
    case DATE: {
        NullableDateVector dateVector = (NullableDateVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            dateVector.getMutator().setNull(index);
        } else {
            dateVector.getMutator().setSafe(index, value * (long) DateTimeConstants.MILLIS_PER_DAY);
        }
        return;
    }
    case TIME: {
        NullableTimeVector timeVector = (NullableTimeVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            timeVector.getMutator().setNull(index);
        } else {
            timeVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case TIMESTAMP: {
        NullableTimeStampVector timeStampVector = (NullableTimeStampVector) v;
        Long value = groupScan.getPartitionValue(path, column, Long.class);
        if (value == null) {
            timeStampVector.getMutator().setNull(index);
        } else {
            timeStampVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case VARCHAR: {
        NullableVarCharVector varCharVector = (NullableVarCharVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            varCharVector.getMutator().setNull(index);
            return;
        } else {
            bytes = getBytes(type, s);
        }
        varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length);
        return;
    }
    case INTERVAL: {
        NullableIntervalVector intervalVector = (NullableIntervalVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            intervalVector.getMutator().setNull(index);
            return;
        } else {
            bytes = getBytes(type, s);
        }
        intervalVector.getMutator().setSafe(index, 1, ParquetReaderUtility.getIntFromLEBytes(bytes, 0),
                ParquetReaderUtility.getIntFromLEBytes(bytes, 4),
                ParquetReaderUtility.getIntFromLEBytes(bytes, 8));
        return;
    }
    default:
        throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}

From source file:org.apache.drill.exec.store.ColumnExplorer.java

License:Apache License

/**
 * Creates map with implicit columns where key is column name, value is columns actual value.
 * This map contains partition and implicit file columns (if requested).
 * Partition columns names are formed based in partition designator and value index.
 *
 * @param filePath file path, used to populate file implicit columns
 * @param partitionValues list of partition values
 * @param includeFileImplicitColumns if file implicit columns should be included into the result
 * @return implicit columns map/*w  w  w .  j a  va 2  s .c  om*/
 */
public Map<String, String> populateImplicitColumns(Path filePath, List<String> partitionValues,
        boolean includeFileImplicitColumns) {
    Map<String, String> implicitValues = new LinkedHashMap<>();

    for (int i = 0; i < partitionValues.size(); i++) {
        if (isStarQuery || selectedPartitionColumns.contains(i)) {
            implicitValues.put(partitionDesignator + i, partitionValues.get(i));
        }
    }

    if (includeFileImplicitColumns) {
        Path path = Path.getPathWithoutSchemeAndAuthority(filePath);
        for (Map.Entry<String, ImplicitFileColumns> entry : selectedImplicitColumns.entrySet()) {
            implicitValues.put(entry.getKey(), entry.getValue().getValue(path));
        }
    }

    return implicitValues;
}

From source file:org.apache.drill.exec.store.dfs.FileSelection.java

License:Apache License

/**
 * Returns longest common path for the given list of files.
 *
 * @param files  list of files.//w w  w  . j a  v  a 2 s  .c  o  m
 * @return  longest common path
 */
private static String commonPathForFiles(final List<String> files) {
    if (files == null || files.isEmpty()) {
        return "";
    }

    final int total = files.size();
    final String[][] folders = new String[total][];
    int shortest = Integer.MAX_VALUE;
    for (int i = 0; i < total; i++) {
        final Path path = new Path(files.get(i));
        folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR);
        shortest = Math.min(shortest, folders[i].length);
    }

    int latest;
    out: for (latest = 0; latest < shortest; latest++) {
        final String current = folders[0][latest];
        for (int i = 1; i < folders.length; i++) {
            if (!current.equals(folders[i][latest])) {
                break out;
            }
        }
    }
    final Path path = new Path(files.get(0));
    final URI uri = path.toUri();
    final String pathString = buildPath(folders[0], latest);
    return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString();
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java

License:Apache License

public HiveDrillNativeParquetScan(String userName, List<SchemaPath> columns,
        HiveStoragePlugin hiveStoragePlugin, List<LogicalInputSplit> logicalInputSplits,
        Map<String, String> confProperties, ParquetReaderConfig readerConfig, LogicalExpression filter)
        throws IOException {
    super(userName, columns, new ArrayList<>(), readerConfig, filter);

    this.hiveStoragePlugin = hiveStoragePlugin;
    this.hivePartitionHolder = new HivePartitionHolder();
    this.confProperties = confProperties;

    for (LogicalInputSplit logicalInputSplit : logicalInputSplits) {
        Iterator<InputSplit> iterator = logicalInputSplit.getInputSplits().iterator();
        // logical input split contains list of splits by files
        // we need to read path of only one to get file path
        assert iterator.hasNext();
        InputSplit split = iterator.next();
        assert split instanceof FileSplit;
        FileSplit fileSplit = (FileSplit) split;
        Path finalPath = fileSplit.getPath();
        Path pathString = Path.getPathWithoutSchemeAndAuthority(finalPath);
        entries.add(new ReadEntryWithPath(pathString));

        // store partition values per path
        Partition partition = logicalInputSplit.getPartition();
        if (partition != null) {
            hivePartitionHolder.add(pathString, partition.getValues());
        }//from  w  w w.j  a  v  a 2s.  c  om
    }

    init();
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java

License:Apache License

@Override
protected void initInternal() throws IOException {
    Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>();
    for (ReadEntryWithPath entry : entries) {
        Path path = entry.getPath();
        Configuration conf = new ProjectionPusher()
                .pushProjectionsAndFilters(new JobConf(hiveStoragePlugin.getHiveConf()), path.getParent());
        FileSystem fs = path.getFileSystem(conf);
        fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs);
    }//from   ww  w . java  2s. co m
    parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig);
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java

License:Apache License

@Override
public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config,
        List<RecordBatch> children) throws ExecutionSetupException {
    final Table table = config.getTable();
    final List<InputSplit> splits = config.getInputSplits();
    final List<Partition> partitions = config.getPartitions();
    final List<SchemaPath> columns = config.getColumns();
    final String partitionDesignator = context.getOptions()
            .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
    List<Map<String, String>> implicitColumns = Lists.newLinkedList();
    boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns);

    final boolean hasPartitions = (partitions != null && partitions.size() > 0);

    final List<String[]> partitionColumns = Lists.newArrayList();
    final List<Integer> selectedPartitionColumns = Lists.newArrayList();
    List<SchemaPath> newColumns = columns;
    if (!selectAllQuery) {
        // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the
        // ParquetRecordReader. Partition columns are passed to ScanBatch.
        newColumns = Lists.newArrayList();
        Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));
        for (SchemaPath column : columns) {
            Matcher m = pattern.matcher(column.getAsUnescapedPath());
            if (m.matches()) {
                selectedPartitionColumns.add(
                        Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length())));
            } else {
                newColumns.add(column);//from   w w  w .  ja va 2 s . co  m
            }
        }
    }

    final OperatorContext oContext = context.newOperatorContext(config);

    int currentPartitionIndex = 0;
    final List<RecordReader> readers = Lists.newArrayList();

    final HiveConf conf = config.getHiveConf();

    // TODO: In future we can get this cache from Metadata cached on filesystem.
    final Map<String, ParquetMetadata> footerCache = Maps.newHashMap();

    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    try {
        for (InputSplit split : splits) {
            final FileSplit fileSplit = (FileSplit) split;
            final Path finalPath = fileSplit.getPath();
            final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf),
                    finalPath.getParent());
            final FileSystem fs = finalPath.getFileSystem(cloneJob);

            ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString());
            if (parquetMetadata == null) {
                parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
                footerCache.put(finalPath.toString(), parquetMetadata);
            }
            final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata);

            for (int rowGroupNum : rowGroupNums) {
                readers.add(new ParquetRecordReader(context,
                        Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs,
                        CodecFactory.createDirectCodecFactory(fs.getConf(),
                                new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0),
                        parquetMetadata, newColumns));
                Map<String, String> implicitValues = Maps.newLinkedHashMap();

                if (hasPartitions) {
                    List<String> values = partitions.get(currentPartitionIndex).getValues();
                    for (int i = 0; i < values.size(); i++) {
                        if (selectAllQuery || selectedPartitionColumns.contains(i)) {
                            implicitValues.put(partitionDesignator + i, values.get(i));
                        }
                    }
                }
                implicitColumns.add(implicitValues);
                if (implicitValues.size() > mapWithMaxColumns.size()) {
                    mapWithMaxColumns = implicitValues;
                }
            }
            currentPartitionIndex++;
        }
    } catch (final IOException | RuntimeException e) {
        AutoCloseables.close(e, readers);
        throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e);
    }

    // all readers should have the same number of implicit columns, add missing ones with value null
    mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight());
    }

    // If there are no readers created (which is possible when the table is empty or no row groups are matched),
    // create an empty RecordReader to output the schema
    if (readers.size() == 0) {
        readers.add(new HiveRecordReader(table, null, null, columns, context, conf,
                ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName())));
    }

    return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns);
}