Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:org.apache.crunch.io.avro.trevni.TrevniFileReaderFactory.java

License:Apache License

@Override
public Iterator<T> read(FileSystem fs, final Path path) {
    this.mapFn.initialize();
    try {/*from   w w w.j  a  v a  2  s.com*/
        HadoopInput input = new HadoopInput(path, fs.getConf());
        final AvroColumnReader<T> reader = getReader(input, aType, schema);
        return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
            @Override
            public boolean hasNext() {
                return reader.hasNext();
            }

            @Override
            public T next() {
                return mapFn.map(reader.next());
            }
        });
    } catch (IOException e) {
        LOG.info("Could not read avro file at path: " + path, e);
        return Iterators.emptyIterator();
    }
}

From source file:org.apache.crunch.io.hbase.HFileReaderFactory.java

License:Apache License

@Override
public Iterator<KeyValue> read(FileSystem fs, Path path) {
    Configuration conf = fs.getConf();
    CacheConfig cacheConfig = new CacheConfig(conf);
    try {//from  w ww .  j  a v  a  2  s .c  om
        HFile.Reader hfr = HFile.createReader(fs, path, cacheConfig);
        HFileScanner scanner = hfr.getScanner(conf.getBoolean(HFILE_SCANNER_CACHE_BLOCKS, false),
                conf.getBoolean(HFILE_SCANNER_PREAD, false));
        scanner.seekTo();
        return new HFileIterator(scanner);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.crunch.io.hbase.HFileTargetIT.java

License:Apache License

/** Reads the first value on a given row from a bunch of hfiles. */
private static KeyValue readFromHFiles(FileSystem fs, Path mrOutputPath, String row) throws IOException {
    List<KeyValueScanner> scanners = Lists.newArrayList();
    KeyValue fakeKV = KeyValue.createFirstOnRow(Bytes.toBytes(row));
    for (FileStatus e : fs.listStatus(mrOutputPath)) {
        Path f = e.getPath();//from ww  w  . j av a  2 s .co  m
        if (!f.getName().startsWith("part-")) { // filter out "_SUCCESS"
            continue;
        }
        StoreFile.Reader reader = new StoreFile.Reader(fs, f, new CacheConfig(fs.getConf()),
                DataBlockEncoding.NONE);
        StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
        scanner.seek(fakeKV); // have to call seek of each underlying scanner, otherwise KeyValueHeap won't work
        scanners.add(scanner);
    }
    assertTrue(!scanners.isEmpty());
    KeyValueScanner kvh = new KeyValueHeap(scanners, KeyValue.COMPARATOR);
    boolean seekOk = kvh.seek(fakeKV);
    assertTrue(seekOk);
    KeyValue kv = kvh.next();
    kvh.close();
    return kv;
}

From source file:org.apache.crunch.io.impl.DefaultFileReaderFactory.java

License:Apache License

@Override
public Iterator<T> read(FileSystem fs, Path path) {
    final Configuration conf = new Configuration(fs.getConf());
    bundle.configure(conf);/*from  w  w  w  .  j  av a  2  s  .c  o  m*/
    ptype.initialize(conf);

    final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf);
    final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    try {
        Job job = new Job(conf);
        FileInputFormat.addInputPath(job, path);
        return Iterators.concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<T>>() {
            @Override
            public Iterator<T> apply(InputSplit split) {
                try {
                    RecordReader reader = fmt.createRecordReader(split, ctxt);
                    reader.initialize(split, ctxt);
                    return new RecordReaderIterator<T>(reader, ptype);
                } catch (Exception e) {
                    LOG.error("Error reading split: " + split, e);
                    throw new CrunchRuntimeException(e);
                }
            }
        }).iterator());
    } catch (Exception e) {
        LOG.error("Error reading path: " + path, e);
        throw new CrunchRuntimeException(e);
    }
}

From source file:org.apache.crunch.io.seq.SeqFileReaderFactory.java

License:Apache License

@Override
public Iterator<T> read(FileSystem fs, final Path path) {
    mapFn.initialize();/*from  w ww .  j  a  v a2s.co m*/
    try {
        final SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf());
        return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() {
            boolean nextChecked = false;
            boolean hasNext = false;

            @Override
            public boolean hasNext() {
                if (nextChecked == true) {
                    return hasNext;
                }
                try {
                    hasNext = reader.next(key, value);
                    nextChecked = true;
                    return hasNext;
                } catch (IOException e) {
                    LOG.info("Error reading from path: " + path, e);
                    return false;
                }
            }

            @Override
            public T next() {
                if (!nextChecked && !hasNext()) {
                    return null;
                }
                nextChecked = false;
                return mapFn.map(converter.convertInput(key, value));
            }
        });
    } catch (IOException e) {
        LOG.info("Could not read seqfile at path: " + path, e);
        return Iterators.emptyIterator();
    }
}

From source file:org.apache.crunch.SparkHFileTargetIT.java

License:Apache License

/** Reads the first value on a given row from a bunch of hfiles. */
private static KeyValue readFromHFiles(FileSystem fs, Path mrOutputPath, String row) throws IOException {
    List<KeyValueScanner> scanners = Lists.newArrayList();
    KeyValue fakeKV = KeyValue.createFirstOnRow(Bytes.toBytes(row));
    for (FileStatus e : fs.listStatus(mrOutputPath)) {
        Path f = e.getPath();/*from  w w w  .j  av  a  2 s  .c  o  m*/
        if (!f.getName().startsWith("part-")) { // filter out "_SUCCESS"
            continue;
        }
        StoreFile.Reader reader = new StoreFile.Reader(fs, f, new CacheConfig(fs.getConf()), fs.getConf());
        StoreFileScanner scanner = reader.getStoreFileScanner(false, false);
        scanner.seek(fakeKV); // have to call seek of each underlying scanner, otherwise KeyValueHeap won't work
        scanners.add(scanner);
    }
    assertTrue(!scanners.isEmpty());
    KeyValueScanner kvh = new KeyValueHeap(scanners, KeyValue.COMPARATOR);
    boolean seekOk = kvh.seek(fakeKV);
    assertTrue(seekOk);
    Cell kv = kvh.next();
    kvh.close();
    return KeyValue.cloneAndAddTags(kv, ImmutableList.<Tag>of());
}

From source file:org.apache.drill.exec.planner.common.DrillStatsTable.java

License:Apache License

public DrillStatsTable(String schemaName, String tableName, Path tablePath, FileSystem fs) {
    this.schemaName = schemaName;
    this.tableName = tableName;
    this.tablePath = tablePath;
    this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf());
}

From source file:org.apache.drill.exec.record.metadata.schema.PathSchemaProvider.java

License:Apache License

public PathSchemaProvider(FileSystem fs, Path path) throws IOException {
    this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf());

    if (!fs.exists(path.getParent())) {
        throw new IOException(
                String.format("Parent path for schema file [%s] does not exist", path.toUri().getPath()));
    }/*from w  w  w.  j av  a  2  s  .  c o m*/

    this.path = path;
}

From source file:org.apache.drill.exec.store.avro.AvroRecordReader.java

License:Apache License

private DataFileReader<GenericContainer> getReader(final Path hadoop, final FileSystem fs)
        throws ExecutionSetupException {
    try {/*from   w  w w.j  ava  2s  .c  o  m*/
        final UserGroupInformation ugi = ImpersonationUtil.createProxyUgi(this.opUserName, this.queryUserName);
        return ugi.doAs(new PrivilegedExceptionAction<DataFileReader<GenericContainer>>() {
            @Override
            public DataFileReader<GenericContainer> run() throws Exception {
                return new DataFileReader<>(new FsInput(hadoop, fs.getConf()),
                        new GenericDatumReader<GenericContainer>());
            }
        });
    } catch (IOException | InterruptedException e) {
        throw new ExecutionSetupException(String.format("Error in creating avro reader for file: %s", hadoop),
                e);
    }
}

From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java

License:Apache License

@Override
public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config,
        List<RecordBatch> children) throws ExecutionSetupException {
    final Table table = config.getTable();
    final List<InputSplit> splits = config.getInputSplits();
    final List<Partition> partitions = config.getPartitions();
    final List<SchemaPath> columns = config.getColumns();
    final String partitionDesignator = context.getOptions()
            .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val;
    List<Map<String, String>> implicitColumns = Lists.newLinkedList();
    boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns);

    final boolean hasPartitions = (partitions != null && partitions.size() > 0);

    final List<String[]> partitionColumns = Lists.newArrayList();
    final List<Integer> selectedPartitionColumns = Lists.newArrayList();
    List<SchemaPath> newColumns = columns;
    if (!selectAllQuery) {
        // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the
        // ParquetRecordReader. Partition columns are passed to ScanBatch.
        newColumns = Lists.newArrayList();
        Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator));
        for (SchemaPath column : columns) {
            Matcher m = pattern.matcher(column.getAsUnescapedPath());
            if (m.matches()) {
                selectedPartitionColumns.add(
                        Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length())));
            } else {
                newColumns.add(column);/*from  ww  w .j  av  a2 s.c o m*/
            }
        }
    }

    final OperatorContext oContext = context.newOperatorContext(config);

    int currentPartitionIndex = 0;
    final List<RecordReader> readers = Lists.newArrayList();

    final HiveConf conf = config.getHiveConf();

    // TODO: In future we can get this cache from Metadata cached on filesystem.
    final Map<String, ParquetMetadata> footerCache = Maps.newHashMap();

    Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap();
    try {
        for (InputSplit split : splits) {
            final FileSplit fileSplit = (FileSplit) split;
            final Path finalPath = fileSplit.getPath();
            final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf),
                    finalPath.getParent());
            final FileSystem fs = finalPath.getFileSystem(cloneJob);

            ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString());
            if (parquetMetadata == null) {
                parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath);
                footerCache.put(finalPath.toString(), parquetMetadata);
            }
            final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata);

            for (int rowGroupNum : rowGroupNums) {
                readers.add(new ParquetRecordReader(context,
                        Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs,
                        CodecFactory.createDirectCodecFactory(fs.getConf(),
                                new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0),
                        parquetMetadata, newColumns));
                Map<String, String> implicitValues = Maps.newLinkedHashMap();

                if (hasPartitions) {
                    List<String> values = partitions.get(currentPartitionIndex).getValues();
                    for (int i = 0; i < values.size(); i++) {
                        if (selectAllQuery || selectedPartitionColumns.contains(i)) {
                            implicitValues.put(partitionDesignator + i, values.get(i));
                        }
                    }
                }
                implicitColumns.add(implicitValues);
                if (implicitValues.size() > mapWithMaxColumns.size()) {
                    mapWithMaxColumns = implicitValues;
                }
            }
            currentPartitionIndex++;
        }
    } catch (final IOException | RuntimeException e) {
        AutoCloseables.close(e, readers);
        throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e);
    }

    // all readers should have the same number of implicit columns, add missing ones with value null
    mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null));
    for (Map<String, String> map : implicitColumns) {
        map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight());
    }

    // If there are no readers created (which is possible when the table is empty or no row groups are matched),
    // create an empty RecordReader to output the schema
    if (readers.size() == 0) {
        readers.add(new HiveRecordReader(table, null, null, columns, context, conf,
                ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName())));
    }

    return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns);
}