Example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

List of usage examples for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getPathWithoutSchemeAndAuthority.

Prototype

public static Path getPathWithoutSchemeAndAuthority(Path path) 

Source Link

Document

Return a version of the given Path without the scheme information.

Usage

From source file:org.apache.carbondata.core.datastore.impl.FileFactory.java

License:Apache License

/**
 * below method will be used to update the file path
 * for local type/*from w ww  .  ja va 2s .  com*/
 * it removes the file:/ from the path
 *
 * @param filePath
 * @param fileType
 * @return updated file path without url for local
 */
private static String getUpdatedFilePath(String filePath, FileType fileType) {
    switch (fileType) {
    case HDFS:
    case ALLUXIO:
    case VIEWFS:
        return filePath;
    case LOCAL:
    default:
        if (filePath != null && !filePath.isEmpty()) {
            Path pathWithoutSchemeAndAuthority = Path.getPathWithoutSchemeAndAuthority(new Path(filePath));
            return pathWithoutSchemeAndAuthority.toString();
        } else {
            return filePath;
        }
    }
}

From source file:org.apache.drill.exec.physical.impl.scan.file.FileMetadata.java

License:Apache License

public FileMetadata(Path filePath, Path selectionRoot) {
    this.filePath = filePath;

    // If the data source is not a file, no file metadata is available.

    if (selectionRoot == null || filePath == null) {
        dirPath = null;/*  w w w .  j  a  v a 2  s . c o  m*/
        return;
    }

    // If the query is against a single file, selection root and file path
    // will be identical, oddly.

    Path rootPath = Path.getPathWithoutSchemeAndAuthority(selectionRoot);
    Path bareFilePath = Path.getPathWithoutSchemeAndAuthority(filePath);
    if (rootPath.equals(bareFilePath)) {
        dirPath = null;
        return;
    }

    dirPath = ColumnExplorer.parsePartitions(filePath, rootPath, false);
    if (dirPath == null) {
        throw new IllegalArgumentException(
                String.format("Selection root of \"%s\" is not a leading path of \"%s\"",
                        selectionRoot.toString(), filePath.toString()));
    }
}

From source file:org.apache.drill.exec.planner.DFSFilePartitionLocation.java

License:Apache License

public DFSFilePartitionLocation(int max, String selectionRoot, String file, boolean hasDirsOnly) {
    this.file = file;
    this.dirs = new String[max];

    // strip the scheme and authority if they exist
    selectionRoot = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)).toString();

    int start = file.indexOf(selectionRoot) + selectionRoot.length();
    String postPath = file.substring(start);
    if (postPath.length() == 0) {
        return;//from w w w . j a  v  a  2 s .c  om
    }
    if (postPath.charAt(0) == '/') {
        postPath = postPath.substring(1);
    }
    String[] mostDirs = postPath.split("/");
    int maxLoop = Math.min(max, hasDirsOnly ? mostDirs.length : mostDirs.length - 1);
    for (int i = 0; i < maxLoop; i++) {
        this.dirs[i] = mostDirs[i];
    }
}

From source file:org.apache.drill.exec.planner.DFSPartitionLocation.java

License:Apache License

public DFSPartitionLocation(int max, String selectionRoot, String file) {
    this.file = file;
    this.dirs = new String[max];

    // strip the scheme and authority if they exist
    selectionRoot = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot)).toString();

    int start = file.indexOf(selectionRoot) + selectionRoot.length();
    String postPath = file.substring(start);
    if (postPath.length() == 0) {
        return;//  w  w  w. j ava  2s .  c om
    }
    if (postPath.charAt(0) == '/') {
        postPath = postPath.substring(1);
    }
    String[] mostDirs = postPath.split("/");
    int maxLoop = Math.min(max, mostDirs.length - 1);
    for (int i = 0; i < maxLoop; i++) {
        this.dirs[i] = mostDirs[i];
    }
}

From source file:org.apache.drill.exec.planner.ParquetPartitionDescriptor.java

License:Apache License

private void populatePruningVector(ValueVector v, int index, SchemaPath column, Path file) {
    Path path = Path.getPathWithoutSchemeAndAuthority(file);
    TypeProtos.MajorType majorType = getVectorType(column, null);
    TypeProtos.MinorType type = majorType.getMinorType();
    switch (type) {
    case BIT: {/*from w  ww  .  ja  v a2s . co  m*/
        NullableBitVector bitVector = (NullableBitVector) v;
        Boolean value = groupScan.getPartitionValue(path, column, Boolean.class);
        if (value == null) {
            bitVector.getMutator().setNull(index);
        } else {
            bitVector.getMutator().setSafe(index, value ? 1 : 0);
        }
        return;
    }
    case INT: {
        NullableIntVector intVector = (NullableIntVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case SMALLINT: {
        NullableSmallIntVector smallIntVector = (NullableSmallIntVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            smallIntVector.getMutator().setNull(index);
        } else {
            smallIntVector.getMutator().setSafe(index, value.shortValue());
        }
        return;
    }
    case TINYINT: {
        NullableTinyIntVector tinyIntVector = (NullableTinyIntVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            tinyIntVector.getMutator().setNull(index);
        } else {
            tinyIntVector.getMutator().setSafe(index, value.byteValue());
        }
        return;
    }
    case UINT1: {
        NullableUInt1Vector intVector = (NullableUInt1Vector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, value.byteValue());
        }
        return;
    }
    case UINT2: {
        NullableUInt2Vector intVector = (NullableUInt2Vector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, (char) value.shortValue());
        }
        return;
    }
    case UINT4: {
        NullableUInt4Vector intVector = (NullableUInt4Vector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            intVector.getMutator().setNull(index);
        } else {
            intVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case BIGINT: {
        NullableBigIntVector bigIntVector = (NullableBigIntVector) v;
        Long value = groupScan.getPartitionValue(path, column, Long.class);
        if (value == null) {
            bigIntVector.getMutator().setNull(index);
        } else {
            bigIntVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case FLOAT4: {
        NullableFloat4Vector float4Vector = (NullableFloat4Vector) v;
        Float value = groupScan.getPartitionValue(path, column, Float.class);
        if (value == null) {
            float4Vector.getMutator().setNull(index);
        } else {
            float4Vector.getMutator().setSafe(index, value);
        }
        return;
    }
    case FLOAT8: {
        NullableFloat8Vector float8Vector = (NullableFloat8Vector) v;
        Double value = groupScan.getPartitionValue(path, column, Double.class);
        if (value == null) {
            float8Vector.getMutator().setNull(index);
        } else {
            float8Vector.getMutator().setSafe(index, value);
        }
        return;
    }
    case VARBINARY: {
        NullableVarBinaryVector varBinaryVector = (NullableVarBinaryVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            varBinaryVector.getMutator().setNull(index);
            return;
        } else {
            bytes = getBytes(type, s);
        }
        varBinaryVector.getMutator().setSafe(index, bytes, 0, bytes.length);
        return;
    }
    case VARDECIMAL: {
        NullableVarDecimalVector decimalVector = (NullableVarDecimalVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            decimalVector.getMutator().setNull(index);
            return;
        } else if (s instanceof Integer) {
            bytes = Ints.toByteArray((int) s);
        } else if (s instanceof Long) {
            bytes = Longs.toByteArray((long) s);
        } else {
            bytes = getBytes(type, s);
        }
        decimalVector.getMutator().setSafe(index, bytes, 0, bytes.length);
        return;
    }
    case DATE: {
        NullableDateVector dateVector = (NullableDateVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            dateVector.getMutator().setNull(index);
        } else {
            dateVector.getMutator().setSafe(index, value * (long) DateTimeConstants.MILLIS_PER_DAY);
        }
        return;
    }
    case TIME: {
        NullableTimeVector timeVector = (NullableTimeVector) v;
        Integer value = groupScan.getPartitionValue(path, column, Integer.class);
        if (value == null) {
            timeVector.getMutator().setNull(index);
        } else {
            timeVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case TIMESTAMP: {
        NullableTimeStampVector timeStampVector = (NullableTimeStampVector) v;
        Long value = groupScan.getPartitionValue(path, column, Long.class);
        if (value == null) {
            timeStampVector.getMutator().setNull(index);
        } else {
            timeStampVector.getMutator().setSafe(index, value);
        }
        return;
    }
    case VARCHAR: {
        NullableVarCharVector varCharVector = (NullableVarCharVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            varCharVector.getMutator().setNull(index);
            return;
        } else {
            bytes = getBytes(type, s);
        }
        varCharVector.getMutator().setSafe(index, bytes, 0, bytes.length);
        return;
    }
    case INTERVAL: {
        NullableIntervalVector intervalVector = (NullableIntervalVector) v;
        Object s = groupScan.getPartitionValue(path, column, Object.class);
        byte[] bytes;
        if (s == null) {
            intervalVector.getMutator().setNull(index);
            return;
        } else {
            bytes = getBytes(type, s);
        }
        intervalVector.getMutator().setSafe(index, 1, ParquetReaderUtility.getIntFromLEBytes(bytes, 0),
                ParquetReaderUtility.getIntFromLEBytes(bytes, 4),
                ParquetReaderUtility.getIntFromLEBytes(bytes, 8));
        return;
    }
    default:
        throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}

From source file:org.apache.drill.exec.store.ColumnExplorer.java

License:Apache License

/**
 * Creates map with implicit columns where key is column name, value is columns actual value.
 * This map contains partition and implicit file columns (if requested).
 * Partition columns names are formed based in partition designator and value index.
 *
 * @param filePath file path, used to populate file implicit columns
 * @param partitionValues list of partition values
 * @param includeFileImplicitColumns if file implicit columns should be included into the result
 * @return implicit columns map/*w  w  w .  j a  va 2  s .c  om*/
 */
public Map<String, String> populateImplicitColumns(Path filePath, List<String> partitionValues,
        boolean includeFileImplicitColumns) {
    Map<String, String> implicitValues = new LinkedHashMap<>();

    for (int i = 0; i < partitionValues.size(); i++) {
        if (isStarQuery || selectedPartitionColumns.contains(i)) {
            implicitValues.put(partitionDesignator + i, partitionValues.get(i));
        }
    }

    if (includeFileImplicitColumns) {
        Path path = Path.getPathWithoutSchemeAndAuthority(filePath);
        for (Map.Entry<String, ImplicitFileColumns> entry : selectedImplicitColumns.entrySet()) {
            implicitValues.put(entry.getKey(), entry.getValue().getValue(path));
        }
    }

    return implicitValues;
}

From source file:org.apache.drill.exec.store.dfs.FileSelection.java

License:Apache License

/**
 * Returns longest common path for the given list of files.
 *
 * @param files  list of files.//w w  w  . j a  v  a 2 s  .c  o  m
 * @return  longest common path
 */
private static String commonPathForFiles(final List<String> files) {
    if (files == null || files.isEmpty()) {
        return "";
    }

    final int total = files.size();
    final String[][] folders = new String[total][];
    int shortest = Integer.MAX_VALUE;
    for (int i = 0; i < total; i++) {
        final Path path = new Path(files.get(i));
        folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR);
        shortest = Math.min(shortest, folders[i].length);
    }

    int latest;
    out: for (latest = 0; latest < shortest; latest++) {
        final String current = folders[0][latest];
        for (int i = 1; i < folders.length; i++) {
            if (!current.equals(folders[i][latest])) {
                break out;
            }
        }
    }
    final Path path = new Path(files.get(0));
    final URI uri = path.toUri();
    final String pathString = buildPath(folders[0], latest);
    return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString();
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java

License:Apache License

public HiveDrillNativeParquetScan(String userName, List<SchemaPath> columns,
        HiveStoragePlugin hiveStoragePlugin, List<LogicalInputSplit> logicalInputSplits,
        Map<String, String> confProperties, ParquetReaderConfig readerConfig, LogicalExpression filter)
        throws IOException {
    super(userName, columns, new ArrayList<>(), readerConfig, filter);

    this.hiveStoragePlugin = hiveStoragePlugin;
    this.hivePartitionHolder = new HivePartitionHolder();
    this.confProperties = confProperties;

    for (LogicalInputSplit logicalInputSplit : logicalInputSplits) {
        Iterator<InputSplit> iterator = logicalInputSplit.getInputSplits().iterator();
        // logical input split contains list of splits by files
        // we need to read path of only one to get file path
        assert iterator.hasNext();
        InputSplit split = iterator.next();
        assert split instanceof FileSplit;
        FileSplit fileSplit = (FileSplit) split;
        Path finalPath = fileSplit.getPath();
        Path pathString = Path.getPathWithoutSchemeAndAuthority(finalPath);
        entries.add(new ReadEntryWithPath(pathString));

        // store partition values per path
        Partition partition = logicalInputSplit.getPartition();
        if (partition != null) {
            hivePartitionHolder.add(pathString, partition.getValues());
        }//from  w  w w.j  a  v  a 2s.  c  om
    }

    init();
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeParquetScan.java

License:Apache License

@Override
protected void initInternal() throws IOException {
    Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>();
    for (ReadEntryWithPath entry : entries) {
        Path path = entry.getPath();
        Configuration conf = new ProjectionPusher()
                .pushProjectionsAndFilters(new JobConf(hiveStoragePlugin.getHiveConf()), path.getParent());
        FileSystem fs = path.getFileSystem(conf);
        fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs);
    }//from   ww  w . java  2s. co m
    parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig);
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java

License:Apache License

@Override
public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config,
        List<RecordBatch> children) throws ExecutionSetupException {
    final Table table = config.getTable();
    final List<InputSplit> splits = config.getInputSplits();
    final List<Partition> partitions = config.getPartitions();
    final List<SchemaPath> columns = config.getColumns();
    final String partitionDesignator = context.getOptions()
            .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
    List<Map<String, String>> implicitColumns = Lists.newLinkedList();
    boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns);

    final boolean hasPartitions = (partitions != null && partitions.size() > 0);

    final List<String[]> partitionColumns = Lists.newArrayList();
    final List<Integer> selectedPartitionColumns = Lists.newArrayList();
    List<SchemaPath> newColumns = columns;
    if (!selectAllQuery) {
        // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the
        // ParquetRecordReader. Partition columns are passed to ScanBatch.
        newColumns = Lists.newArrayList();
        Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));
        for (SchemaPath column : columns) {
            Matcher m = pattern.matcher(column.getAsUnescapedPath());
            if (m.matches()) {
                selectedPartitionColumns.add(
                        Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length())));
            } else {
                newColumns.add(column);//from   w w  w .  ja va 2 s . co  m
            }
        }
    }

    final OperatorContext oContext = context.newOperatorContext(config);

    int currentPartitionIndex = 0;
    final List<RecordReader> readers = Lists.newArrayList();

    final HiveConf conf = config.getHiveConf();

    // TODO: In future we can get this cache from Metadata cached on filesystem.
    final Map<String, ParquetMetadata> footerCache = Maps.newHashMap();

    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    try {
        for (InputSplit split : splits) {
            final FileSplit fileSplit = (FileSplit) split;
            final Path finalPath = fileSplit.getPath();
            final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf),
                    finalPath.getParent());
            final FileSystem fs = finalPath.getFileSystem(cloneJob);

            ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString());
            if (parquetMetadata == null) {
                parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
                footerCache.put(finalPath.toString(), parquetMetadata);
            }
            final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata);

            for (int rowGroupNum : rowGroupNums) {
                readers.add(new ParquetRecordReader(context,
                        Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs,
                        CodecFactory.createDirectCodecFactory(fs.getConf(),
                                new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0),
                        parquetMetadata, newColumns));
                Map<String, String> implicitValues = Maps.newLinkedHashMap();

                if (hasPartitions) {
                    List<String> values = partitions.get(currentPartitionIndex).getValues();
                    for (int i = 0; i < values.size(); i++) {
                        if (selectAllQuery || selectedPartitionColumns.contains(i)) {
                            implicitValues.put(partitionDesignator + i, values.get(i));
                        }
                    }
                }
                implicitColumns.add(implicitValues);
                if (implicitValues.size() > mapWithMaxColumns.size()) {
                    mapWithMaxColumns = implicitValues;
                }
            }
            currentPartitionIndex++;
        }
    } catch (final IOException | RuntimeException e) {
        AutoCloseables.close(e, readers);
        throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e);
    }

    // all readers should have the same number of implicit columns, add missing ones with value null
    mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight());
    }

    // If there are no readers created (which is possible when the table is empty or no row groups are matched),
    // create an empty RecordReader to output the schema
    if (readers.size() == 0) {
        readers.add(new HiveRecordReader(table, null, null, columns, context, conf,
                ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName())));
    }

    return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns);
}