Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.facebook.hive.orc.ReaderImpl.java

License:Open Source License

public ReaderImpl(FileSystem fs, Path path, Configuration conf) throws IOException {
    try {/*from  w  w w  . j  a  v a2s.co  m*/
        this.fileSystem = fs;
        this.path = path;
        this.conf = conf;
        FSDataInputStream file = fs.open(path);
        long size = fs.getFileStatus(path).getLen();
        int readSize = (int) Math.min(size, DIRECTORY_SIZE_GUESS);
        ByteBuffer buffer = ByteBuffer.allocate(readSize);
        InStream.read(file, size - readSize, buffer.array(), buffer.arrayOffset() + buffer.position(),
                buffer.remaining());
        int psLen = buffer.get(readSize - 1);
        int psOffset = readSize - 1 - psLen;
        CodedInputStream in = CodedInputStream.newInstance(buffer.array(), buffer.arrayOffset() + psOffset,
                psLen);
        OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
        int footerSize = (int) ps.getFooterLength();
        bufferSize = (int) ps.getCompressionBlockSize();
        switch (ps.getCompression()) {
        case NONE:
            compressionKind = CompressionKind.NONE;
            break;
        case ZLIB:
            compressionKind = CompressionKind.ZLIB;
            break;
        case SNAPPY:
            compressionKind = CompressionKind.SNAPPY;
            break;
        case LZO:
            compressionKind = CompressionKind.LZO;
            break;
        default:
            throw new IllegalArgumentException("Unknown compression");
        }
        codec = WriterImpl.createCodec(compressionKind);

        InputStream instream = InStream.create("footer", file, size - 1 - psLen - footerSize, footerSize, codec,
                bufferSize);
        footer = OrcProto.Footer.parseFrom(instream);
        inspector = new OrcLazyRowObjectInspector(0, footer.getTypesList());
        file.close();
    } catch (IndexOutOfBoundsException e) {
        /**
         * When a non ORC file is read by ORC reader, we get IndexOutOfBoundsException exception while
         * creating a reader. Caught that exception and checked the file header to see if the input
         * file was ORC or not. If its not ORC, throw a NotAnORCFileException with the file
         * attempted to be reading (thus helping to figure out which table-partition was being read).
         */
        checkIfORC(fs, path);
        throw new IOException("Failed to create record reader for file " + path, e);
    } catch (IOException e) {
        throw new IOException("Failed to create record reader for file " + path, e);
    }
}

From source file:com.facebook.hive.orc.ReaderImpl.java

License:Open Source License

/**
 * Reads the file header (first 40 bytes) and checks if the first three characters are 'ORC'.
 *///from  w w w.ja  v  a  2  s.  c  o m
public static void checkIfORC(FileSystem fs, Path path) throws IOException {
    // hardcoded to 40 because "SEQ-org.apache.hadoop.hive.ql.io.RCFile", the header, is of 40 chars
    final int buffLen = 40;
    final byte header[] = new byte[buffLen];
    final FSDataInputStream file = fs.open(path);
    final long fileLength = fs.getFileStatus(path).getLen();
    int sizeToBeRead = buffLen;
    if (buffLen > fileLength) {
        sizeToBeRead = (int) fileLength;
    }

    IOUtils.readFully(file, header, 0, sizeToBeRead);
    file.close();

    final String headerString = new String(header);
    if (headerString.startsWith("ORC")) {
        LOG.error("Error while parsing the footer of the file : " + path);
    } else {
        throw new NotAnORCFileException("Input file = " + path + " , header = " + headerString);
    }
}

From source file:com.facebook.hive.orc.RecordReaderImpl.java

License:Open Source License

RecordReaderImpl(Iterable<StripeInformation> stripes, FileSystem fileSystem, Path path, long offset,
        long length, List<OrcProto.Type> types, CompressionCodec codec, int bufferSize, boolean[] included,
        long strideRate, Configuration conf) throws IOException {
    this.file = fileSystem.open(path);
    this.codec = codec;
    this.bufferSize = bufferSize;
    this.included = included;
    this.readStrides = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES);
    this.readEagerlyFromHdfs = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ);
    this.readEagerlyFromHdfsBytes = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ_BYTES);
    long rows = 0;
    long skippedRows = 0;
    for (StripeInformation stripe : stripes) {
        long stripeStart = stripe.getOffset();
        if (offset > stripeStart) {
            skippedRows += stripe.getNumberOfRows();
        } else if (stripeStart < offset + length) {
            this.stripes.add(stripe);
            rows += stripe.getNumberOfRows();
        }/*  ww w  .  j  av  a  2 s.  c o m*/
    }
    firstRow = skippedRows;
    totalRowCount = rows;
    indexes = new OrcProto.RowIndex[types.size()];
    rowIndexStride = strideRate;
    reader = createLazyRow(types, included);
    if (this.stripes.size() > 0) {
        readStripe();
    }
}

From source file:com.facebook.hive.orc.StripeReader.java

License:Open Source License

StripeReader(Iterable<StripeInformation> stripes, FileSystem fileSystem, Path path, long offset, long length)
        throws IOException {
    this.file = fileSystem.open(path);
    for (StripeInformation stripe : stripes) {
        long stripeStart = stripe.getOffset();
        if (stripeStart >= offset && stripeStart < offset + length) {
            this.stripes.add(stripe);
        }//from   ww w  . j  a v a2s . c o  m
    }
}

From source file:com.facebook.presto.accumulo.examples.TpcHBatchWriter.java

License:Apache License

@Override
public int run(AccumuloConfig config, CommandLine cmd) throws Exception {
    Path orders = new Path(cmd.getOptionValue(ORDERS_OPT));
    final FileSystem fs = FileSystem.get(new Configuration());
    if (!fs.exists(orders)) {
        throw new FileNotFoundException(format("File %s does not exist or is a directory", orders));
    }//from  w w w.jav a 2s.  c om

    ZooKeeperInstance inst = new ZooKeeperInstance(config.getInstance(), config.getZooKeepers());
    Connector conn = inst.getConnector(config.getUsername(), new PasswordToken(config.getPassword()));

    validateTable(conn, DATA_TABLE);
    validateTable(conn, INDEX_TABLE);

    BatchWriterConfig bwc = new BatchWriterConfig();
    MultiTableBatchWriter mtbw = conn.createMultiTableBatchWriter(bwc);
    BatchWriter mainWrtr = mtbw.getBatchWriter(DATA_TABLE);
    BatchWriter indexWrtr = mtbw.getBatchWriter(INDEX_TABLE);

    long numTweets = 0;
    long numIndex = 0;

    System.out.println(format("Reading from file: %s", orders));
    BufferedReader rdr = new BufferedReader(new InputStreamReader(fs.open(orders)));

    // For each record in the file
    String line;
    while ((line = rdr.readLine()) != null) {
        // Split the line into fields
        String[] fields = line.split("\\|");
        if (fields.length < 9) {
            System.err.println(format("Record does not contain at least nine fields:\n%s", line));
            continue;
        }

        // Parse out the fields from strings
        Long orderkey = Long.parseLong(fields[0]);
        Long custkey = Long.parseLong(fields[1]);
        String orderstatus = fields[2];
        Double totalprice = Double.parseDouble(fields[3]);
        Date orderdate = sdformat.parse(fields[4]);
        String orderpriority = fields[5];
        String clerk = fields[6];
        Long shippriority = Long.parseLong(fields[7]);
        String comment = fields[8];

        // Create mutation for the row
        Mutation mutation = new Mutation(encode(orderkey));
        mutation.put(CF, CUSTKEY, encode(custkey));
        mutation.put(CF, ORDERSTATUS, encode(orderstatus));
        mutation.put(CF, TOTALPRICE, encode(totalprice));
        mutation.put(CF, ORDERDATE, encode(orderdate));
        mutation.put(CF, ORDERPRIORITY, encode(orderpriority));
        mutation.put(CF, CLERK, encode(clerk));
        mutation.put(CF, SHIPPRIORITY, encode(shippriority));
        mutation.put(CF, COMMENT, encode(comment));
        mainWrtr.addMutation(mutation);
        ++numTweets;

        // Create index mutation for the clerk
        Mutation idxClerk = new Mutation(encode(clerk));
        idxClerk.put(CF, encode(orderkey), EMPTY_BYTES);
        indexWrtr.addMutation(idxClerk);
        ++numIndex;
    }
    rdr.close();

    // Send the mutations to Accumulo and release resources
    mtbw.close();

    // Display how many tweets were inserted into Accumulo
    System.out.println(format("%d tweets Mutations inserted", numTweets));
    System.out.println(format("%d index Mutations inserted", numIndex));
    return 0;
}

From source file:com.facebook.presto.example.ExampleClient.java

License:Apache License

private static Map<String, Map<String, ExampleTable>> lookupSchemas(URI metadataUri,
        JsonCodec<Map<String, List<ExampleTable>>> catalogCodec) throws IOException {
    String json = null;//from  w  w w.  j a  v  a  2  s.  co  m
    if (metadataUri.getScheme().equalsIgnoreCase("hdfs")) {
        // schema file on hdfs
        String hdfsSiteLocation = "/etc/hadoop/conf/hdfs-site.xml";
        String coreSiteLocation = "/etc/hadoop/conf/core-site.xml";

        Configuration conf = new Configuration();
        final Path hdfsConf = new Path(hdfsSiteLocation);
        final Path coreConf = new Path(coreSiteLocation);
        conf.addResource(hdfsConf);
        conf.addResource(coreConf);

        Path schemaPath = new Path(metadataUri);

        FileSystem fs = FileSystem.get(conf);

        if (!fs.exists(schemaPath)) {
            byte[] schemaBytes = ByteStreams.toByteArray(fs.open(schemaPath));
            json = new String(schemaBytes, UTF_8);
        }
    } else {
        URL result = metadataUri.toURL();
        json = Resources.toString(result, UTF_8);
    }
    Map<String, List<ExampleTable>> catalog = catalogCodec.fromJson(json);

    return ImmutableMap.copyOf(transformValues(catalog, resolveAndIndexTables(metadataUri)));
}

From source file:com.facebook.presto.hdfs.HDFSPageSourceProvider.java

License:Apache License

private HdfsParquetDataSource buildHdfsParquetDataSource(FileSystem fileSystem, Path path, long start,
        long length) {
    try {/*w  w w . ja  va2 s.  c om*/
        long size = fileSystem.getFileStatus(path).getLen();
        FSDataInputStream inputStream = fileSystem.open(path);
        return new HdfsParquetDataSource(path, size, inputStream);
    } catch (IOException e) {
        throw new HdfsSplitNotOpenException(path);
    }
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private static List<Path> getTargetPathsFromSymlink(FileSystem fileSystem, Path symlinkDir) {
    try {/*from w  ww  .ja  va2  s . c  om*/
        FileStatus[] symlinks = fileSystem.listStatus(symlinkDir, HIDDEN_FILES_PATH_FILTER);
        List<Path> targets = new ArrayList<>();

        for (FileStatus symlink : symlinks) {
            try (BufferedReader reader = new BufferedReader(
                    new InputStreamReader(fileSystem.open(symlink.getPath()), StandardCharsets.UTF_8))) {
                CharStreams.readLines(reader).stream().map(Path::new).forEach(targets::add);
            }
        }
        return targets;
    } catch (IOException e) {
        throw new PrestoException(HIVE_BAD_DATA, "Error parsing symlinks from: " + symlinkDir, e);
    }
}

From source file:com.facebook.presto.hive.orc.OrcPageSourceFactory.java

License:Apache License

public static OrcPageSource createOrcPageSource(MetadataReader metadataReader, HdfsEnvironment hdfsEnvironment,
        String sessionUser, Configuration configuration, Path path, long start, long length,
        List<HiveColumnHandle> columns, boolean useOrcColumnNames,
        TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone,
        TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize,
        boolean orcBloomFiltersEnabled) {
    OrcDataSource orcDataSource;//  w ww .j ava  2 s.  c  o m
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        long size = fileSystem.getFileStatus(path).getLen();
        FSDataInputStream inputStream = fileSystem.open(path);
        orcDataSource = new HdfsOrcDataSource(path.toString(), size, maxMergeDistance, maxBufferSize,
                streamBufferSize, inputStream);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed")
                || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }

    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(orcDataSource, metadataReader, maxMergeDistance, maxBufferSize);

        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames,
                reader, path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }

        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(),
                orcBloomFiltersEnabled);

        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, start,
                length, hiveStorageTimeZone, systemMemoryUsage);

        return new OrcPageSource(recordReader, orcDataSource, physicalColumns, typeManager, systemMemoryUsage);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}

From source file:com.facebook.presto.hive.OrcFileWriterFactory.java

License:Apache License

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames,
        StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }//  w  w w .  j  ava2 s.  c o  m

    boolean isDwrf;
    if (OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        isDwrf = false;
    } else if (com.facebook.hive.orc.OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        isDwrf = true;
    } else {
        return Optional.empty();
    }

    CompressionKind compression = getCompression(schema, configuration);

    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings()
            .splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream()
            .map(hiveType -> hiveType.getType(typeManager)).collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);

        Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()),
                            fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session),
                            getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false,
                            fileSystem.open(path), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new OrcFileWriter(outputStream, rollbackAction, isDwrf, fileColumnNames,
                fileColumnTypes, compression, fileInputColumnIndexes,
                ImmutableMap.<String, String>builder()
                        .put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString())
                        .put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(),
                hiveStorageTimeZone, validationInputFactory));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
    }
}