List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.facebook.hive.orc.ReaderImpl.java
License:Open Source License
public ReaderImpl(FileSystem fs, Path path, Configuration conf) throws IOException { try {/*from w w w . j a v a2s.co m*/ this.fileSystem = fs; this.path = path; this.conf = conf; FSDataInputStream file = fs.open(path); long size = fs.getFileStatus(path).getLen(); int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS); ByteBuffer buffer = ByteBuffer.allocate(readSize); InStream.read(file, size - readSize, buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); int psLen = buffer.get(readSize - 1); int psOffset = readSize - 1 - psLen; CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset, psLen); OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in); int footerSize = (int) ps.getFooterLength(); bufferSize = (int) ps.getCompressionBlockSize(); switch (ps.getCompression()) { case NONE: compressionKind = CompressionKind.NONE; break; case ZLIB: compressionKind = CompressionKind.ZLIB; break; case SNAPPY: compressionKind = CompressionKind.SNAPPY; break; case LZO: compressionKind = CompressionKind.LZO; break; default: throw new IllegalArgumentException("Unknown compression"); } codec = WriterImpl.createCodec(compressionKind); InputStream instream = InStream.create("footer", file, size - 1 - psLen - footerSize, footerSize, codec, bufferSize); footer = OrcProto.Footer.parseFrom(instream); inspector = new OrcLazyRowObjectInspector(0, footer.getTypesList()); file.close(); } catch (IndexOutOfBoundsException e) { /** * When a non ORC file is read by ORC reader, we get IndexOutOfBoundsException exception while * creating a reader. Caught that exception and checked the file header to see if the input * file was ORC or not. If its not ORC, throw a NotAnORCFileException with the file * attempted to be reading (thus helping to figure out which table-partition was being read). */ checkIfORC(fs, path); throw new IOException("Failed to create record reader for file " + path, e); } catch (IOException e) { throw new IOException("Failed to create record reader for file " + path, e); } }
From source file:com.facebook.hive.orc.ReaderImpl.java
License:Open Source License
/** * Reads the file header (first 40 bytes) and checks if the first three characters are 'ORC'. *///from w w w.ja v a 2 s. c o m public static void checkIfORC(FileSystem fs, Path path) throws IOException { // hardcoded to 40 because "SEQ-org.apache.hadoop.hive.ql.io.RCFile", the header, is of 40 chars final int buffLen = 40; final byte header[] = new byte[buffLen]; final FSDataInputStream file = fs.open(path); final long fileLength = fs.getFileStatus(path).getLen(); int sizeToBeRead = buffLen; if (buffLen > fileLength) { sizeToBeRead = (int) fileLength; } IOUtils.readFully(file, header, 0, sizeToBeRead); file.close(); final String headerString = new String(header); if (headerString.startsWith("ORC")) { LOG.error("Error while parsing the footer of the file : " + path); } else { throw new NotAnORCFileException("Input file = " + path + " , header = " + headerString); } }
From source file:com.facebook.hive.orc.RecordReaderImpl.java
License:Open Source License
RecordReaderImpl(Iterable<StripeInformation> stripes, FileSystem fileSystem, Path path, long offset, long length, List<OrcProto.Type> types, CompressionCodec codec, int bufferSize, boolean[] included, long strideRate, Configuration conf) throws IOException { this.file = fileSystem.open(path); this.codec = codec; this.bufferSize = bufferSize; this.included = included; this.readStrides = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES); this.readEagerlyFromHdfs = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ); this.readEagerlyFromHdfsBytes = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ_BYTES); long rows = 0; long skippedRows = 0; for (StripeInformation stripe : stripes) { long stripeStart = stripe.getOffset(); if (offset > stripeStart) { skippedRows += stripe.getNumberOfRows(); } else if (stripeStart < offset + length) { this.stripes.add(stripe); rows += stripe.getNumberOfRows(); }/* ww w . j av a 2 s. c o m*/ } firstRow = skippedRows; totalRowCount = rows; indexes = new OrcProto.RowIndex[types.size()]; rowIndexStride = strideRate; reader = createLazyRow(types, included); if (this.stripes.size() > 0) { readStripe(); } }
From source file:com.facebook.hive.orc.StripeReader.java
License:Open Source License
StripeReader(Iterable<StripeInformation> stripes, FileSystem fileSystem, Path path, long offset, long length) throws IOException { this.file = fileSystem.open(path); for (StripeInformation stripe : stripes) { long stripeStart = stripe.getOffset(); if (stripeStart >= offset && stripeStart < offset + length) { this.stripes.add(stripe); }//from ww w . j a v a2s . c o m } }
From source file:com.facebook.presto.accumulo.examples.TpcHBatchWriter.java
License:Apache License
@Override public int run(AccumuloConfig config, CommandLine cmd) throws Exception { Path orders = new Path(cmd.getOptionValue(ORDERS_OPT)); final FileSystem fs = FileSystem.get(new Configuration()); if (!fs.exists(orders)) { throw new FileNotFoundException(format("File %s does not exist or is a directory", orders)); }//from w w w.jav a 2s. c om ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers()); Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword())); validateTable(conn, DATA_TABLE); validateTable(conn, INDEX_TABLE); BatchWriterConfig bwc = new BatchWriterConfig(); MultiTableBatchWriter mtbw = conn.createMultiTableBatchWriter(bwc); BatchWriter mainWrtr = mtbw.getBatchWriter(DATA_TABLE); BatchWriter indexWrtr = mtbw.getBatchWriter(INDEX_TABLE); long numTweets = 0; long numIndex = 0; System.out.println(format("Reading from file: %s", orders)); BufferedReader rdr = new BufferedReader(new InputStreamReader(fs.open(orders))); // For each record in the file String line; while ((line = rdr.readLine()) != null) { // Split the line into fields String[] fields = line.split("\\|"); if (fields.length < 9) { System.err.println(format("Record does not contain at least nine fields:\n%s", line)); continue; } // Parse out the fields from strings Long orderkey = Long.parseLong(fields[0]); Long custkey = Long.parseLong(fields[1]); String orderstatus = fields[2]; Double totalprice = Double.parseDouble(fields[3]); Date orderdate = sdformat.parse(fields[4]); String orderpriority = fields[5]; String clerk = fields[6]; Long shippriority = Long.parseLong(fields[7]); String comment = fields[8]; // Create mutation for the row Mutation mutation = new Mutation(encode(orderkey)); mutation.put(CF, CUSTKEY, encode(custkey)); mutation.put(CF, ORDERSTATUS, encode(orderstatus)); mutation.put(CF, TOTALPRICE, encode(totalprice)); mutation.put(CF, ORDERDATE, encode(orderdate)); mutation.put(CF, ORDERPRIORITY, encode(orderpriority)); mutation.put(CF, CLERK, encode(clerk)); mutation.put(CF, SHIPPRIORITY, encode(shippriority)); mutation.put(CF, COMMENT, encode(comment)); mainWrtr.addMutation(mutation); ++numTweets; // Create index mutation for the clerk Mutation idxClerk = new Mutation(encode(clerk)); idxClerk.put(CF, encode(orderkey), EMPTY_BYTES); indexWrtr.addMutation(idxClerk); ++numIndex; } rdr.close(); // Send the mutations to Accumulo and release resources mtbw.close(); // Display how many tweets were inserted into Accumulo System.out.println(format("%d tweets Mutations inserted", numTweets)); System.out.println(format("%d index Mutations inserted", numIndex)); return 0; }
From source file:com.facebook.presto.example.ExampleClient.java
License:Apache License
private static Map<String, Map<String, ExampleTable>> lookupSchemas(URI metadataUri, JsonCodec<Map<String, List<ExampleTable>>> catalogCodec) throws IOException { String json = null;//from w w w. j a v a 2 s. co m if (metadataUri.getScheme().equalsIgnoreCase("hdfs")) { // schema file on hdfs String hdfsSiteLocation = "/etc/hadoop/conf/hdfs-site.xml"; String coreSiteLocation = "/etc/hadoop/conf/core-site.xml"; Configuration conf = new Configuration(); final Path hdfsConf = new Path(hdfsSiteLocation); final Path coreConf = new Path(coreSiteLocation); conf.addResource(hdfsConf); conf.addResource(coreConf); Path schemaPath = new Path(metadataUri); FileSystem fs = FileSystem.get(conf); if (!fs.exists(schemaPath)) { byte[] schemaBytes = ByteStreams.toByteArray(fs.open(schemaPath)); json = new String(schemaBytes, UTF_8); } } else { URL result = metadataUri.toURL(); json = Resources.toString(result, UTF_8); } Map<String, List<ExampleTable>> catalog = catalogCodec.fromJson(json); return ImmutableMap.copyOf(transformValues(catalog, resolveAndIndexTables(metadataUri))); }
From source file:com.facebook.presto.hdfs.HDFSPageSourceProvider.java
License:Apache License
private HdfsParquetDataSource buildHdfsParquetDataSource(FileSystem fileSystem, Path path, long start, long length) { try {/*w w w . ja va2 s. c om*/ long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); return new HdfsParquetDataSource(path, size, inputStream); } catch (IOException e) { throw new HdfsSplitNotOpenException(path); } }
From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java
License:Apache License
private static List<Path> getTargetPathsFromSymlink(FileSystem fileSystem, Path symlinkDir) { try {/*from w ww .ja va2 s . c om*/ FileStatus[] symlinks = fileSystem.listStatus(symlinkDir, HIDDEN_FILES_PATH_FILTER); List<Path> targets = new ArrayList<>(); for (FileStatus symlink : symlinks) { try (BufferedReader reader = new BufferedReader( new InputStreamReader(fileSystem.open(symlink.getPath()), StandardCharsets.UTF_8))) { CharStreams.readLines(reader).stream().map(Path::new).forEach(targets::add); } } return targets; } catch (IOException e) { throw new PrestoException(HIVE_BAD_DATA, "Error parsing symlinks from: " + symlinkDir, e); } }
From source file:com.facebook.presto.hive.orc.OrcPageSourceFactory.java
License:Apache License
public static OrcPageSource createOrcPageSource(MetadataReader metadataReader, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, boolean orcBloomFiltersEnabled) { OrcDataSource orcDataSource;// w ww .j ava 2 s. c o m try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration); long size = fileSystem.getFileStatus(path).getLen(); FSDataInputStream inputStream = fileSystem.open(path); orcDataSource = new HdfsOrcDataSource(path.toString(), size, maxMergeDistance, maxBufferSize, streamBufferSize, inputStream); } catch (Exception e) { if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) { throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e); } throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e); } AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext(); try { OrcReader reader = new OrcReader(orcDataSource, metadataReader, maxMergeDistance, maxBufferSize); List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader, path); ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder(); ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder(); for (HiveColumnHandle column : physicalColumns) { if (column.getColumnType() == REGULAR) { Type type = typeManager.getType(column.getTypeSignature()); includedColumns.put(column.getHiveColumnIndex(), type); columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type)); } } OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled); OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage); return new OrcPageSource(recordReader, orcDataSource, physicalColumns, typeManager, systemMemoryUsage); } catch (Exception e) { try { orcDataSource.close(); } catch (IOException ignored) { } if (e instanceof PrestoException) { throw (PrestoException) e; } String message = splitError(e, path, start, length); if (e.getClass().getSimpleName().equals("BlockMissingException")) { throw new PrestoException(HIVE_MISSING_DATA, message, e); } throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e); } }
From source file:com.facebook.presto.hive.OrcFileWriterFactory.java
License:Apache License
@Override public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) { if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) { return Optional.empty(); }// w w w . j ava2 s. c o m boolean isDwrf; if (OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { isDwrf = false; } else if (com.facebook.hive.orc.OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { isDwrf = true; } else { return Optional.empty(); } CompressionKind compression = getCompression(schema, configuration); // existing tables and partitions may have columns in a different order than the writer is providing, so build // an index to rearrange columns in the proper order List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings() .splitToList(schema.getProperty(META_TABLE_COLUMNS, "")); List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream() .map(hiveType -> hiveType.getType(typeManager)).collect(toList()); int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray(); try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration); OutputStream outputStream = fileSystem.create(path); Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty(); if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, fileSystem.open(path), stats); } catch (IOException e) { throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e); } }); } Callable<Void> rollbackAction = () -> { fileSystem.delete(path, false); return null; }; return Optional.of(new OrcFileWriter(outputStream, rollbackAction, isDwrf, fileColumnNames, fileColumnTypes, compression, fileInputColumnIndexes, ImmutableMap.<String, String>builder() .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()) .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), hiveStorageTimeZone, validationInputFactory)); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e); } }