List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:org.apache.crunch.io.avro.trevni.TrevniFileReaderFactory.java
License:Apache License
@Override public Iterator<T> read(FileSystem fs, final Path path) { this.mapFn.initialize(); try {/*from w w w.j a v a 2 s.com*/ HadoopInput input = new HadoopInput(path, fs.getConf()); final AvroColumnReader<T> reader = getReader(input, aType, schema); return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() { @Override public boolean hasNext() { return reader.hasNext(); } @Override public T next() { return mapFn.map(reader.next()); } }); } catch (IOException e) { LOG.info("Could not read avro file at path: " + path, e); return Iterators.emptyIterator(); } }
From source file:org.apache.crunch.io.hbase.HFileReaderFactory.java
License:Apache License
@Override public Iterator<KeyValue> read(FileSystem fs, Path path) { Configuration conf = fs.getConf(); CacheConfig cacheConfig = new CacheConfig(conf); try {//from w ww . j a v a 2 s .c om HFile.Reader hfr = HFile.createReader(fs, path, cacheConfig); HFileScanner scanner = hfr.getScanner(conf.getBoolean(HFILE_SCANNER_CACHE_BLOCKS, false), conf.getBoolean(HFILE_SCANNER_PREAD, false)); scanner.seekTo(); return new HFileIterator(scanner); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.crunch.io.hbase.HFileTargetIT.java
License:Apache License
/** Reads the first value on a given row from a bunch of hfiles. */ private static KeyValue readFromHFiles(FileSystem fs, Path mrOutputPath, String row) throws IOException { List<KeyValueScanner> scanners = Lists.newArrayList(); KeyValue fakeKV = KeyValue.createFirstOnRow(Bytes.toBytes(row)); for (FileStatus e : fs.listStatus(mrOutputPath)) { Path f = e.getPath();//from ww w . j av a 2 s .co m if (!f.getName().startsWith("part-")) { // filter out "_SUCCESS" continue; } StoreFile.Reader reader = new StoreFile.Reader(fs, f, new CacheConfig(fs.getConf()), DataBlockEncoding.NONE); StoreFileScanner scanner = reader.getStoreFileScanner(false, false); scanner.seek(fakeKV); // have to call seek of each underlying scanner, otherwise KeyValueHeap won't work scanners.add(scanner); } assertTrue(!scanners.isEmpty()); KeyValueScanner kvh = new KeyValueHeap(scanners, KeyValue.COMPARATOR); boolean seekOk = kvh.seek(fakeKV); assertTrue(seekOk); KeyValue kv = kvh.next(); kvh.close(); return kv; }
From source file:org.apache.crunch.io.impl.DefaultFileReaderFactory.java
License:Apache License
@Override public Iterator<T> read(FileSystem fs, Path path) { final Configuration conf = new Configuration(fs.getConf()); bundle.configure(conf);/*from w w w . j av a 2 s .c o m*/ ptype.initialize(conf); final InputFormat fmt = ReflectionUtils.newInstance(bundle.getFormatClass(), conf); final TaskAttemptContext ctxt = new TaskAttemptContextImpl(conf, new TaskAttemptID()); try { Job job = new Job(conf); FileInputFormat.addInputPath(job, path); return Iterators.concat(Lists.transform(fmt.getSplits(job), new Function<InputSplit, Iterator<T>>() { @Override public Iterator<T> apply(InputSplit split) { try { RecordReader reader = fmt.createRecordReader(split, ctxt); reader.initialize(split, ctxt); return new RecordReaderIterator<T>(reader, ptype); } catch (Exception e) { LOG.error("Error reading split: " + split, e); throw new CrunchRuntimeException(e); } } }).iterator()); } catch (Exception e) { LOG.error("Error reading path: " + path, e); throw new CrunchRuntimeException(e); } }
From source file:org.apache.crunch.io.seq.SeqFileReaderFactory.java
License:Apache License
@Override public Iterator<T> read(FileSystem fs, final Path path) { mapFn.initialize();/*from w ww . j a v a2s.co m*/ try { final SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); return new AutoClosingIterator<T>(reader, new UnmodifiableIterator<T>() { boolean nextChecked = false; boolean hasNext = false; @Override public boolean hasNext() { if (nextChecked == true) { return hasNext; } try { hasNext = reader.next(key, value); nextChecked = true; return hasNext; } catch (IOException e) { LOG.info("Error reading from path: " + path, e); return false; } } @Override public T next() { if (!nextChecked && !hasNext()) { return null; } nextChecked = false; return mapFn.map(converter.convertInput(key, value)); } }); } catch (IOException e) { LOG.info("Could not read seqfile at path: " + path, e); return Iterators.emptyIterator(); } }
From source file:org.apache.crunch.SparkHFileTargetIT.java
License:Apache License
/** Reads the first value on a given row from a bunch of hfiles. */ private static KeyValue readFromHFiles(FileSystem fs, Path mrOutputPath, String row) throws IOException { List<KeyValueScanner> scanners = Lists.newArrayList(); KeyValue fakeKV = KeyValue.createFirstOnRow(Bytes.toBytes(row)); for (FileStatus e : fs.listStatus(mrOutputPath)) { Path f = e.getPath();/*from w w w .j av a 2 s .c o m*/ if (!f.getName().startsWith("part-")) { // filter out "_SUCCESS" continue; } StoreFile.Reader reader = new StoreFile.Reader(fs, f, new CacheConfig(fs.getConf()), fs.getConf()); StoreFileScanner scanner = reader.getStoreFileScanner(false, false); scanner.seek(fakeKV); // have to call seek of each underlying scanner, otherwise KeyValueHeap won't work scanners.add(scanner); } assertTrue(!scanners.isEmpty()); KeyValueScanner kvh = new KeyValueHeap(scanners, KeyValue.COMPARATOR); boolean seekOk = kvh.seek(fakeKV); assertTrue(seekOk); Cell kv = kvh.next(); kvh.close(); return KeyValue.cloneAndAddTags(kv, ImmutableList.<Tag>of()); }
From source file:org.apache.drill.exec.planner.common.DrillStatsTable.java
License:Apache License
public DrillStatsTable(String schemaName, String tableName, Path tablePath, FileSystem fs) { this.schemaName = schemaName; this.tableName = tableName; this.tablePath = tablePath; this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf()); }
From source file:org.apache.drill.exec.record.metadata.schema.PathSchemaProvider.java
License:Apache License
public PathSchemaProvider(FileSystem fs, Path path) throws IOException { this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf()); if (!fs.exists(path.getParent())) { throw new IOException( String.format("Parent path for schema file [%s] does not exist", path.toUri().getPath())); }/*from w w w. j av a 2 s . c o m*/ this.path = path; }
From source file:org.apache.drill.exec.store.avro.AvroRecordReader.java
License:Apache License
private DataFileReader<GenericContainer> getReader(final Path hadoop, final FileSystem fs) throws ExecutionSetupException { try {/*from w w w.j ava 2s .c o m*/ final UserGroupInformation ugi = ImpersonationUtil.createProxyUgi(this.opUserName, this.queryUserName); return ugi.doAs(new PrivilegedExceptionAction<DataFileReader<GenericContainer>>() { @Override public DataFileReader<GenericContainer> run() throws Exception { return new DataFileReader<>(new FsInput(hadoop, fs.getConf()), new GenericDatumReader<GenericContainer>()); } }); } catch (IOException | InterruptedException e) { throw new ExecutionSetupException(String.format("Error in creating avro reader for file: %s", hadoop), e); } }
From source file:org.apache.drill.exec.store.hive.HiveDrillNativeScanBatchCreator.java
License:Apache License
@Override public ScanBatch getBatch(FragmentContext context, HiveDrillNativeParquetSubScan config, List<RecordBatch> children) throws ExecutionSetupException { final Table table = config.getTable(); final List<InputSplit> splits = config.getInputSplits(); final List<Partition> partitions = config.getPartitions(); final List<SchemaPath> columns = config.getColumns(); final String partitionDesignator = context.getOptions() .getOption(ExecConstants.FILESYSTEM_PARTITION_COLUMN_LABEL).string_val; List<Map<String, String>> implicitColumns = Lists.newLinkedList(); boolean selectAllQuery = AbstractRecordReader.isStarQuery(columns); final boolean hasPartitions = (partitions != null && partitions.size() > 0); final List<String[]> partitionColumns = Lists.newArrayList(); final List<Integer> selectedPartitionColumns = Lists.newArrayList(); List<SchemaPath> newColumns = columns; if (!selectAllQuery) { // Separate out the partition and non-partition columns. Non-partition columns are passed directly to the // ParquetRecordReader. Partition columns are passed to ScanBatch. newColumns = Lists.newArrayList(); Pattern pattern = Pattern.compile(String.format("%s[0-9]+", partitionDesignator)); for (SchemaPath column : columns) { Matcher m = pattern.matcher(column.getAsUnescapedPath()); if (m.matches()) { selectedPartitionColumns.add( Integer.parseInt(column.getAsUnescapedPath().substring(partitionDesignator.length()))); } else { newColumns.add(column);/*from ww w .j av a2 s.c o m*/ } } } final OperatorContext oContext = context.newOperatorContext(config); int currentPartitionIndex = 0; final List<RecordReader> readers = Lists.newArrayList(); final HiveConf conf = config.getHiveConf(); // TODO: In future we can get this cache from Metadata cached on filesystem. final Map<String, ParquetMetadata> footerCache = Maps.newHashMap(); Map<String, String> mapWithMaxColumns = Maps.newLinkedHashMap(); try { for (InputSplit split : splits) { final FileSplit fileSplit = (FileSplit) split; final Path finalPath = fileSplit.getPath(); final JobConf cloneJob = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(conf), finalPath.getParent()); final FileSystem fs = finalPath.getFileSystem(cloneJob); ParquetMetadata parquetMetadata = footerCache.get(finalPath.toString()); if (parquetMetadata == null) { parquetMetadata = ParquetFileReader.readFooter(cloneJob, finalPath); footerCache.put(finalPath.toString(), parquetMetadata); } final List<Integer> rowGroupNums = getRowGroupNumbersFromFileSplit(fileSplit, parquetMetadata); for (int rowGroupNum : rowGroupNums) { readers.add(new ParquetRecordReader(context, Path.getPathWithoutSchemeAndAuthority(finalPath).toString(), rowGroupNum, fs, CodecFactory.createDirectCodecFactory(fs.getConf(), new ParquetDirectByteBufferAllocator(oContext.getAllocator()), 0), parquetMetadata, newColumns)); Map<String, String> implicitValues = Maps.newLinkedHashMap(); if (hasPartitions) { List<String> values = partitions.get(currentPartitionIndex).getValues(); for (int i = 0; i < values.size(); i++) { if (selectAllQuery || selectedPartitionColumns.contains(i)) { implicitValues.put(partitionDesignator + i, values.get(i)); } } } implicitColumns.add(implicitValues); if (implicitValues.size() > mapWithMaxColumns.size()) { mapWithMaxColumns = implicitValues; } } currentPartitionIndex++; } } catch (final IOException | RuntimeException e) { AutoCloseables.close(e, readers); throw new ExecutionSetupException("Failed to create RecordReaders. " + e.getMessage(), e); } // all readers should have the same number of implicit columns, add missing ones with value null mapWithMaxColumns = Maps.transformValues(mapWithMaxColumns, Functions.constant((String) null)); for (Map<String, String> map : implicitColumns) { map.putAll(Maps.difference(map, mapWithMaxColumns).entriesOnlyOnRight()); } // If there are no readers created (which is possible when the table is empty or no row groups are matched), // create an empty RecordReader to output the schema if (readers.size() == 0) { readers.add(new HiveRecordReader(table, null, null, columns, context, conf, ImpersonationUtil.createProxyUgi(config.getUserName(), context.getQueryUserName()))); } return new ScanBatch(config, context, oContext, readers.iterator(), implicitColumns); }