Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.facebook.presto.hive.DwrfRecordCursorProvider.java

License:Apache License

@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
        ConnectorSession session, Path path, long start, long length, Properties schema,
        List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {
    @SuppressWarnings("deprecation")
    Deserializer deserializer = getDeserializer(schema);
    if (!(deserializer instanceof OrcSerde)) {
        return Optional.absent();
    }//from ww w  .j a va 2 s .  co m

    StructObjectInspector rowInspector = getTableObjectInspector(schema);
    if (!all(rowInspector.getAllStructFieldRefs(), isSupportedDwrfType())) {
        throw new IllegalArgumentException("DWRF does not support DATE type");
    }

    ReaderWriterProfiler.setProfilerOptions(configuration);

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration));
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys,
            columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager));
}

From source file:com.facebook.presto.hive.HiveClient.java

License:Apache License

@Override
public RecordSink getRecordSink(ConnectorOutputTableHandle tableHandle) {
    HiveOutputTableHandle handle = checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle");

    Path target = new Path(handle.getTemporaryPath(), randomUUID().toString());
    JobConf conf = new JobConf(hdfsEnvironment.getConfiguration(target));

    return new HiveRecordSink(handle, target, conf);
}

From source file:com.facebook.presto.hive.HiveMetadata.java

License:Apache License

private List<String> computeFileNamesForMissingBuckets(HiveStorageFormat storageFormat, Path targetPath,
        String filePrefix, int bucketCount, PartitionUpdate partitionUpdate) {
    if (partitionUpdate.getFileNames().size() == bucketCount) {
        // fast path for common case
        return ImmutableList.of();
    }/*from   www . ja va 2s .c om*/
    JobConf conf = new JobConf(hdfsEnvironment.getConfiguration(targetPath));
    String fileExtension = HiveWriterFactory.getFileExtension(conf, fromHiveStorageFormat(storageFormat));
    Set<String> fileNames = partitionUpdate.getFileNames().stream().collect(Collectors.toSet());
    ImmutableList.Builder<String> missingFileNamesBuilder = ImmutableList.builder();
    for (int i = 0; i < bucketCount; i++) {
        String fileName = HiveWriterFactory.computeBucketedFileName(filePrefix, i) + fileExtension;
        if (!fileNames.contains(fileName)) {
            missingFileNamesBuilder.add(fileName);
        }
    }
    List<String> missingFileNames = missingFileNamesBuilder.build();
    verify(fileNames.size() + missingFileNames.size() == bucketCount);
    return missingFileNames;
}

From source file:com.facebook.presto.hive.HiveMetadata.java

License:Apache License

private void createEmptyFile(Path path, Table table, Optional<Partition> partition, List<String> fileNames) {
    JobConf conf = new JobConf(hdfsEnvironment.getConfiguration(path));

    Properties schema;/*from  w w w .  j  a  va  2  s .c o  m*/
    StorageFormat format;
    if (partition.isPresent()) {
        schema = getHiveSchema(partition.get(), table);
        format = partition.get().getStorage().getStorageFormat();
    } else {
        schema = getHiveSchema(table);
        format = table.getStorage().getStorageFormat();
    }

    for (String fileName : fileNames) {
        writeEmptyFile(new Path(path, fileName), conf, schema, format.getSerDe(), format.getOutputFormat());
    }
}

From source file:com.facebook.presto.hive.HivePageSink.java

License:Apache License

public HivePageSink(String schemaName, String tableName, boolean isCreateTable,
        List<HiveColumnHandle> inputColumns, HiveStorageFormat tableStorageFormat,
        LocationHandle locationHandle, LocationService locationService, String filePrefix,
        HiveMetastore metastore, PageIndexerFactory pageIndexerFactory, TypeManager typeManager,
        HdfsEnvironment hdfsEnvironment, boolean respectTableFormat, int maxOpenPartitions,
        boolean immutablePartitions, JsonCodec<PartitionUpdate> partitionUpdateCodec) {
    this.schemaName = requireNonNull(schemaName, "schemaName is null");
    this.tableName = requireNonNull(tableName, "tableName is null");

    requireNonNull(inputColumns, "inputColumns is null");

    this.tableStorageFormat = requireNonNull(tableStorageFormat, "tableStorageFormat is null");
    this.locationHandle = requireNonNull(locationHandle, "locationHandle is null");
    this.locationService = requireNonNull(locationService, "locationService is null");
    this.filePrefix = requireNonNull(filePrefix, "filePrefix is null");

    this.metastore = requireNonNull(metastore, "metastore is null");

    requireNonNull(pageIndexerFactory, "pageIndexerFactory is null");

    this.typeManager = requireNonNull(typeManager, "typeManager is null");

    this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
    this.respectTableFormat = respectTableFormat;
    this.maxOpenPartitions = maxOpenPartitions;
    this.immutablePartitions = immutablePartitions;
    this.partitionUpdateCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null");

    // divide input columns into partition and data columns
    ImmutableList.Builder<String> partitionColumnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> partitionColumnTypes = ImmutableList.builder();
    ImmutableList.Builder<String> dataColumnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> dataColumnTypes = ImmutableList.builder();
    for (HiveColumnHandle column : inputColumns) {
        if (column.isPartitionKey()) {
            partitionColumnNames.add(column.getName());
            partitionColumnTypes.add(typeManager.getType(column.getTypeSignature()));
        } else {//from w  w  w . ja  va2  s.  c om
            dataColumnNames.add(column.getName());
            dataColumnTypes.add(typeManager.getType(column.getTypeSignature()));
        }
    }
    this.partitionColumnNames = partitionColumnNames.build();
    this.partitionColumnTypes = partitionColumnTypes.build();
    this.dataColumnNames = dataColumnNames.build();
    this.dataColumnTypes = dataColumnTypes.build();

    // determine the input index of the partition columns and data columns
    ImmutableList.Builder<Integer> partitionColumns = ImmutableList.builder();
    ImmutableList.Builder<Integer> dataColumns = ImmutableList.builder();
    // sample weight column is passed separately, so index must be calculated without this column
    List<HiveColumnHandle> inputColumnsWithoutSample = inputColumns.stream()
            .filter(column -> !column.getName().equals(SAMPLE_WEIGHT_COLUMN_NAME)).collect(toList());
    for (int inputIndex = 0; inputIndex < inputColumnsWithoutSample.size(); inputIndex++) {
        HiveColumnHandle column = inputColumnsWithoutSample.get(inputIndex);
        if (column.isPartitionKey()) {
            partitionColumns.add(inputIndex);
        } else {
            dataColumns.add(inputIndex);
        }
    }
    this.partitionColumns = Ints.toArray(partitionColumns.build());
    this.dataColumns = Ints.toArray(dataColumns.build());

    this.pageIndexer = pageIndexerFactory.createPageIndexer(this.partitionColumnTypes);

    // preallocate temp space for partition and data
    this.partitionRow = Arrays.asList(new Object[this.partitionColumnNames.size()]);
    this.dataRow = Arrays.asList(new Object[this.dataColumnNames.size()]);

    if (isCreateTable) {
        this.table = null;
        Optional<Path> writePath = locationService.writePathRoot(locationHandle);
        checkArgument(writePath.isPresent(), "CREATE TABLE must have a write path");
        conf = new JobConf(hdfsEnvironment.getConfiguration(writePath.get()));
    } else {
        Optional<Table> table = metastore.getTable(schemaName, tableName);
        if (!table.isPresent()) {
            throw new PrestoException(HIVE_INVALID_METADATA,
                    format("Table %s.%s was dropped during insert", schemaName, tableName));
        }
        this.table = table.get();
        Path hdfsEnvironmentPath = locationService.writePathRoot(locationHandle)
                .orElseGet(() -> locationService.targetPathRoot(locationHandle));
        conf = new JobConf(hdfsEnvironment.getConfiguration(hdfsEnvironmentPath));
    }
}

From source file:com.facebook.presto.hive.HiveRecordSet.java

License:Apache License

private static RecordReader<?, ?> createRecordReader(HiveSplit split, Configuration configuration,
        Path wrappedPath) {/*from  w  w  w  .  java 2  s.  co m*/
    final InputFormat<?, ?> inputFormat = getInputFormat(configuration, split.getSchema(), true);
    final JobConf jobConf = new JobConf(configuration);
    final FileSplit fileSplit = createFileSplit(wrappedPath, split.getStart(), split.getLength());

    // propagate serialization configuration to getRecordReader
    for (String name : split.getSchema().stringPropertyNames()) {
        if (name.startsWith("serialization.")) {
            jobConf.set(name, split.getSchema().getProperty(name));
        }
    }

    try {
        return retry().stopOnIllegalExceptions().run("createRecordReader", new Callable<RecordReader<?, ?>>() {
            @Override
            public RecordReader<?, ?> call() throws IOException {
                return inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
            }
        });
    } catch (Exception e) {
        throw new PrestoException(HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT.toErrorCode(),
                String.format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s",
                        split.getPath(), split.getStart(), split.getLength(),
                        getInputFormatName(split.getSchema()), e.getMessage()),
                e);
    }
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private void loadPartitionSplits(final HiveSplitQueue hiveSplitQueue, SuspendingExecutor suspendingExecutor)
        throws InterruptedException {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            semaphore.acquire();//from   w  w  w . ja  v  a 2s.c o m
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
            Path partitionPath = hdfsEnvironment.getFileSystemWrapper().wrap(path);

            FileSystem fs = partitionPath.getFileSystem(configuration);
            final LastSplitMarkingQueue markerQueue = new LastSplitMarkingQueue(hiveSplitQueue);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, partitionPath);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    markerQueue.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false));
                }
                markerQueue.finish();
                continue;
            }

            ListenableFuture<Void> partitionFuture = new AsyncRecursiveWalker(fs, suspendingExecutor)
                    .beginWalk(partitionPath, new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            if (bucket.isPresent()
                                    && !fileMatchesBucket(file.getPath().getName(), bucket.get())) {
                                return;
                            }

                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                markerQueue.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                                        file.getLen(), schema, partitionKeys, splittable));
                            } catch (IOException e) {
                                hiveSplitQueue.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    markerQueue.finish();
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    markerQueue.finish();
                    semaphore.release();
                }
            });
            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitQueue.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitQueue.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitQueue.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor,
        final ConnectorSession session) {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);

            FileSystem fs = path.getFileSystem(configuration);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, path);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false, session));
                }//from w w  w .ja v  a2s. com
                continue;
            }

            // TODO: this is currently serial across all partitions and should be done in suspendingExecutor
            if (bucket.isPresent()) {
                Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path);
                if (bucketFile.isPresent()) {
                    FileStatus file = bucketFile.get();
                    BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen());
                    boolean splittable = isSplittable(inputFormat, fs, file.getPath());

                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                            file.getLen(), schema, partitionKeys, splittable, session));
                    continue;
                }
            }

            // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously
            // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future
            // callback to release it. Otherwise, we will need a try-finally block around this section.
            try {
                semaphore.acquire();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            }

            ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path,
                    new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations,
                                        0, file.getLen(), schema, partitionKeys, splittable, session));
                            } catch (IOException e) {
                                hiveSplitSource.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    semaphore.release();
                }
            });

            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitSource.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitSource.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitSource.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveUtil.java

License:Apache License

static InputFormat<?, ?> getInputFormat(Configuration configuration, Properties schema, boolean symlinkTarget) {
    String inputFormatName = getInputFormatName(schema);
    try {// www .  j  a  v a2s .co m
        JobConf jobConf = new JobConf(configuration);

        Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName);
        if (symlinkTarget && (inputFormatClass == SymlinkTextInputFormat.class)) {
            // symlink targets are always TextInputFormat
            inputFormatClass = TextInputFormat.class;
        }

        return ReflectionUtils.newInstance(inputFormatClass, jobConf);
    } catch (ClassNotFoundException | RuntimeException e) {
        throw new RuntimeException("Unable to create input format " + inputFormatName, e);
    }
}

From source file:com.facebook.presto.hive.HiveWriterFactory.java

License:Apache License

public HiveWriterFactory(String schemaName, String tableName, boolean isCreateTable,
        List<HiveColumnHandle> inputColumns, HiveStorageFormat tableStorageFormat,
        HiveStorageFormat partitionStorageFormat, OptionalInt bucketCount, LocationHandle locationHandle,
        LocationService locationService, String filePrefix,
        HivePageSinkMetadataProvider pageSinkMetadataProvider, TypeManager typeManager,
        HdfsEnvironment hdfsEnvironment, boolean immutablePartitions, ConnectorSession session) {
    this.schemaName = requireNonNull(schemaName, "schemaName is null");
    this.tableName = requireNonNull(tableName, "tableName is null");

    this.tableStorageFormat = requireNonNull(tableStorageFormat, "tableStorageFormat is null");
    this.partitionStorageFormat = requireNonNull(partitionStorageFormat, "partitionStorageFormat is null");
    this.locationHandle = requireNonNull(locationHandle, "locationHandle is null");
    this.locationService = requireNonNull(locationService, "locationService is null");
    this.filePrefix = requireNonNull(filePrefix, "filePrefix is null");

    this.pageSinkMetadataProvider = requireNonNull(pageSinkMetadataProvider,
            "pageSinkMetadataProvider is null");

    this.typeManager = requireNonNull(typeManager, "typeManager is null");

    this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
    this.immutablePartitions = immutablePartitions;

    // divide input columns into partition and data columns
    requireNonNull(inputColumns, "inputColumns is null");
    ImmutableList.Builder<String> partitionColumnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> partitionColumnTypes = ImmutableList.builder();
    ImmutableList.Builder<DataColumn> dataColumns = ImmutableList.builder();
    for (HiveColumnHandle column : inputColumns) {
        HiveType hiveType = column.getHiveType();
        if (column.isPartitionKey()) {
            partitionColumnNames.add(column.getName());
            partitionColumnTypes.add(typeManager.getType(column.getTypeSignature()));
        } else {/*from w  w  w.j av  a 2 s .com*/
            dataColumns.add(
                    new DataColumn(column.getName(), typeManager.getType(column.getTypeSignature()), hiveType));
        }
    }
    this.partitionColumnNames = partitionColumnNames.build();
    this.partitionColumnTypes = partitionColumnTypes.build();
    this.dataColumns = dataColumns.build();

    if (isCreateTable) {
        this.table = null;
        Optional<Path> writePath = locationService.writePathRoot(locationHandle);
        checkArgument(writePath.isPresent(), "CREATE TABLE must have a write path");
        conf = new JobConf(hdfsEnvironment.getConfiguration(writePath.get()));
    } else {
        Optional<Table> table = pageSinkMetadataProvider.getTable();
        if (!table.isPresent()) {
            throw new PrestoException(HIVE_INVALID_METADATA,
                    format("Table %s.%s was dropped during insert", schemaName, tableName));
        }
        this.table = table.get();
        Path hdfsEnvironmentPath = locationService.writePathRoot(locationHandle)
                .orElseGet(() -> locationService.targetPathRoot(locationHandle));
        conf = new JobConf(hdfsEnvironment.getConfiguration(hdfsEnvironmentPath));
    }

    this.bucketCount = requireNonNull(bucketCount, "bucketCount is null");
    if (bucketCount.isPresent()) {
        checkArgument(bucketCount.getAsInt() < MAX_BUCKET_COUNT,
                "bucketCount must be smaller than " + MAX_BUCKET_COUNT);
    }

    this.session = requireNonNull(session, "session is null");
}