Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.facebook.presto.hive.DwrfRecordCursorProvider.java

License:Apache License

@Override
public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
        ConnectorSession session, Path path, long start, long length, Properties schema,
        List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
        TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {
    @SuppressWarnings("deprecation")
    Deserializer deserializer = getDeserializer(schema);
    if (!(deserializer instanceof OrcSerde)) {
        return Optional.absent();
    }//from ww w  .j a va 2 s .  co m

    StructObjectInspector rowInspector = getTableObjectInspector(schema);
    if (!all(rowInspector.getAllStructFieldRefs(), isSupportedDwrfType())) {
        throw new IllegalArgumentException("DWRF does not support DATE type");
    }

    ReaderWriterProfiler.setProfilerOptions(configuration);

    RecordReader recordReader;
    try {
        FileSystem fileSystem = path.getFileSystem(configuration);
        Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration));
        boolean[] include = findIncludedColumns(reader.getTypes(), columns);
        recordReader = reader.rows(start, length, include);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }

    return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys,
            columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager));
}

From source file:com.facebook.presto.hive.HiveClient.java

License:Apache License

@Override
public RecordSink getRecordSink(ConnectorOutputTableHandle tableHandle) {
    HiveOutputTableHandle handle = checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle");

    Path target = new Path(handle.getTemporaryPath(), randomUUID().toString());
    JobConf conf = new JobConf(hdfsEnvironment.getConfiguration(target));

    return new HiveRecordSink(handle, target, conf);
}

From source file:com.facebook.presto.hive.HiveMetadata.java

License:Apache License

private List<String> computeFileNamesForMissingBuckets(HiveStorageFormat storageFormat, Path targetPath,
        String filePrefix, int bucketCount, PartitionUpdate partitionUpdate) {
    if (partitionUpdate.getFileNames().size() == bucketCount) {
        // fast path for common case
        return ImmutableList.of();
    }/*from   www . ja va 2s .c om*/
    JobConf conf = new JobConf(hdfsEnvironment.getConfiguration(targetPath));
    String fileExtension = HiveWriterFactory.getFileExtension(conf, fromHiveStorageFormat(storageFormat));
    Set<String> fileNames = partitionUpdate.getFileNames().stream().collect(Collectors.toSet());
    ImmutableList.Builder<String> missingFileNamesBuilder = ImmutableList.builder();
    for (int i = 0; i < bucketCount; i++) {
        String fileName = HiveWriterFactory.computeBucketedFileName(filePrefix, i) + fileExtension;
        if (!fileNames.contains(fileName)) {
            missingFileNamesBuilder.add(fileName);
        }
    }
    List<String> missingFileNames = missingFileNamesBuilder.build();
    verify(fileNames.size() + missingFileNames.size() == bucketCount);
    return missingFileNames;
}

From source file:com.facebook.presto.hive.HiveMetadata.java

License:Apache License

private void createEmptyFile(Path path, Table table, Optional<Partition> partition, List<String> fileNames) {
    JobConf conf = new JobConf(hdfsEnvironment.getConfiguration(path));

    Properties schema;/*from  w w w .  j  a  va  2  s .c o  m*/
    StorageFormat format;
    if (partition.isPresent()) {
        schema = getHiveSchema(partition.get(), table);
        format = partition.get().getStorage().getStorageFormat();
    } else {
        schema = getHiveSchema(table);
        format = table.getStorage().getStorageFormat();
    }

    for (String fileName : fileNames) {
        writeEmptyFile(new Path(path, fileName), conf, schema, format.getSerDe(), format.getOutputFormat());
    }
}

From source file:com.facebook.presto.hive.HivePageSink.java

License:Apache License

public HivePageSink(String schemaName, String tableName, boolean isCreateTable,
        List<HiveColumnHandle> inputColumns, HiveStorageFormat tableStorageFormat,
        LocationHandle locationHandle, LocationService locationService, String filePrefix,
        HiveMetastore metastore, PageIndexerFactory pageIndexerFactory, TypeManager typeManager,
        HdfsEnvironment hdfsEnvironment, boolean respectTableFormat, int maxOpenPartitions,
        boolean immutablePartitions, JsonCodec<PartitionUpdate> partitionUpdateCodec) {
    this.schemaName = requireNonNull(schemaName, "schemaName is null");
    this.tableName = requireNonNull(tableName, "tableName is null");

    requireNonNull(inputColumns, "inputColumns is null");

    this.tableStorageFormat = requireNonNull(tableStorageFormat, "tableStorageFormat is null");
    this.locationHandle = requireNonNull(locationHandle, "locationHandle is null");
    this.locationService = requireNonNull(locationService, "locationService is null");
    this.filePrefix = requireNonNull(filePrefix, "filePrefix is null");

    this.metastore = requireNonNull(metastore, "metastore is null");

    requireNonNull(pageIndexerFactory, "pageIndexerFactory is null");

    this.typeManager = requireNonNull(typeManager, "typeManager is null");

    this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
    this.respectTableFormat = respectTableFormat;
    this.maxOpenPartitions = maxOpenPartitions;
    this.immutablePartitions = immutablePartitions;
    this.partitionUpdateCodec = requireNonNull(partitionUpdateCodec, "partitionUpdateCodec is null");

    // divide input columns into partition and data columns
    ImmutableList.Builder<String> partitionColumnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> partitionColumnTypes = ImmutableList.builder();
    ImmutableList.Builder<String> dataColumnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> dataColumnTypes = ImmutableList.builder();
    for (HiveColumnHandle column : inputColumns) {
        if (column.isPartitionKey()) {
            partitionColumnNames.add(column.getName());
            partitionColumnTypes.add(typeManager.getType(column.getTypeSignature()));
        } else {//from w  w  w . ja  va2  s.  c om
            dataColumnNames.add(column.getName());
            dataColumnTypes.add(typeManager.getType(column.getTypeSignature()));
        }
    }
    this.partitionColumnNames = partitionColumnNames.build();
    this.partitionColumnTypes = partitionColumnTypes.build();
    this.dataColumnNames = dataColumnNames.build();
    this.dataColumnTypes = dataColumnTypes.build();

    // determine the input index of the partition columns and data columns
    ImmutableList.Builder<Integer> partitionColumns = ImmutableList.builder();
    ImmutableList.Builder<Integer> dataColumns = ImmutableList.builder();
    // sample weight column is passed separately, so index must be calculated without this column
    List<HiveColumnHandle> inputColumnsWithoutSample = inputColumns.stream()
            .filter(column -> !column.getName().equals(SAMPLE_WEIGHT_COLUMN_NAME)).collect(toList());
    for (int inputIndex = 0; inputIndex < inputColumnsWithoutSample.size(); inputIndex++) {
        HiveColumnHandle column = inputColumnsWithoutSample.get(inputIndex);
        if (column.isPartitionKey()) {
            partitionColumns.add(inputIndex);
        } else {
            dataColumns.add(inputIndex);
        }
    }
    this.partitionColumns = Ints.toArray(partitionColumns.build());
    this.dataColumns = Ints.toArray(dataColumns.build());

    this.pageIndexer = pageIndexerFactory.createPageIndexer(this.partitionColumnTypes);

    // preallocate temp space for partition and data
    this.partitionRow = Arrays.asList(new Object[this.partitionColumnNames.size()]);
    this.dataRow = Arrays.asList(new Object[this.dataColumnNames.size()]);

    if (isCreateTable) {
        this.table = null;
        Optional<Path> writePath = locationService.writePathRoot(locationHandle);
        checkArgument(writePath.isPresent(), "CREATE TABLE must have a write path");
        conf = new JobConf(hdfsEnvironment.getConfiguration(writePath.get()));
    } else {
        Optional<Table> table = metastore.getTable(schemaName, tableName);
        if (!table.isPresent()) {
            throw new PrestoException(HIVE_INVALID_METADATA,
                    format("Table %s.%s was dropped during insert", schemaName, tableName));
        }
        this.table = table.get();
        Path hdfsEnvironmentPath = locationService.writePathRoot(locationHandle)
                .orElseGet(() -> locationService.targetPathRoot(locationHandle));
        conf = new JobConf(hdfsEnvironment.getConfiguration(hdfsEnvironmentPath));
    }
}

From source file:com.facebook.presto.hive.HiveRecordSet.java

License:Apache License

private static RecordReader<?, ?> createRecordReader(HiveSplit split, Configuration configuration,
        Path wrappedPath) {/*from  w  w  w  .  java 2  s.  co m*/
    final InputFormat<?, ?> inputFormat = getInputFormat(configuration, split.getSchema(), true);
    final JobConf jobConf = new JobConf(configuration);
    final FileSplit fileSplit = createFileSplit(wrappedPath, split.getStart(), split.getLength());

    // propagate serialization configuration to getRecordReader
    for (String name : split.getSchema().stringPropertyNames()) {
        if (name.startsWith("serialization.")) {
            jobConf.set(name, split.getSchema().getProperty(name));
        }
    }

    try {
        return retry().stopOnIllegalExceptions().run("createRecordReader", new Callable<RecordReader<?, ?>>() {
            @Override
            public RecordReader<?, ?> call() throws IOException {
                return inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
            }
        });
    } catch (Exception e) {
        throw new PrestoException(HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT.toErrorCode(),
                String.format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s",
                        split.getPath(), split.getStart(), split.getLength(),
                        getInputFormatName(split.getSchema()), e.getMessage()),
                e);
    }
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private void loadPartitionSplits(final HiveSplitQueue hiveSplitQueue, SuspendingExecutor suspendingExecutor)
        throws InterruptedException {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            semaphore.acquire();//from   w  w  w . ja  v  a 2s.c o m
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
            Path partitionPath = hdfsEnvironment.getFileSystemWrapper().wrap(path);

            FileSystem fs = partitionPath.getFileSystem(configuration);
            final LastSplitMarkingQueue markerQueue = new LastSplitMarkingQueue(hiveSplitQueue);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, partitionPath);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    markerQueue.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false));
                }
                markerQueue.finish();
                continue;
            }

            ListenableFuture<Void> partitionFuture = new AsyncRecursiveWalker(fs, suspendingExecutor)
                    .beginWalk(partitionPath, new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            if (bucket.isPresent()
                                    && !fileMatchesBucket(file.getPath().getName(), bucket.get())) {
                                return;
                            }

                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                markerQueue.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                                        file.getLen(), schema, partitionKeys, splittable));
                            } catch (IOException e) {
                                hiveSplitQueue.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    markerQueue.finish();
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    markerQueue.finish();
                    semaphore.release();
                }
            });
            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitQueue.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitQueue.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitQueue.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor,
        final ConnectorSession session) {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);

            FileSystem fs = path.getFileSystem(configuration);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, path);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false, session));
                }//from w w  w .ja v  a2s. com
                continue;
            }

            // TODO: this is currently serial across all partitions and should be done in suspendingExecutor
            if (bucket.isPresent()) {
                Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path);
                if (bucketFile.isPresent()) {
                    FileStatus file = bucketFile.get();
                    BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen());
                    boolean splittable = isSplittable(inputFormat, fs, file.getPath());

                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                            file.getLen(), schema, partitionKeys, splittable, session));
                    continue;
                }
            }

            // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously
            // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future
            // callback to release it. Otherwise, we will need a try-finally block around this section.
            try {
                semaphore.acquire();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            }

            ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path,
                    new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations,
                                        0, file.getLen(), schema, partitionKeys, splittable, session));
                            } catch (IOException e) {
                                hiveSplitSource.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    semaphore.release();
                }
            });

            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitSource.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitSource.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitSource.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveUtil.java

License:Apache License

static InputFormat<?, ?> getInputFormat(Configuration configuration, Properties schema, boolean symlinkTarget) {
    String inputFormatName = getInputFormatName(schema);
    try {// www .  j  a  v a2s .co m
        JobConf jobConf = new JobConf(configuration);

        Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName);
        if (symlinkTarget && (inputFormatClass == SymlinkTextInputFormat.class)) {
            // symlink targets are always TextInputFormat
            inputFormatClass = TextInputFormat.class;
        }

        return ReflectionUtils.newInstance(inputFormatClass, jobConf);
    } catch (ClassNotFoundException | RuntimeException e) {
        throw new RuntimeException("Unable to create input format " + inputFormatName, e);
    }
}

From source file:com.facebook.presto.hive.HiveWriterFactory.java

License:Apache License

public HiveWriterFactory(String schemaName, String tableName, boolean isCreateTable,
        List<HiveColumnHandle> inputColumns, HiveStorageFormat tableStorageFormat,
        HiveStorageFormat partitionStorageFormat, OptionalInt bucketCount, LocationHandle locationHandle,
        LocationService locationService, String filePrefix,
        HivePageSinkMetadataProvider pageSinkMetadataProvider, TypeManager typeManager,
        HdfsEnvironment hdfsEnvironment, boolean immutablePartitions, ConnectorSession session) {
    this.schemaName = requireNonNull(schemaName, "schemaName is null");
    this.tableName = requireNonNull(tableName, "tableName is null");

    this.tableStorageFormat = requireNonNull(tableStorageFormat, "tableStorageFormat is null");
    this.partitionStorageFormat = requireNonNull(partitionStorageFormat, "partitionStorageFormat is null");
    this.locationHandle = requireNonNull(locationHandle, "locationHandle is null");
    this.locationService = requireNonNull(locationService, "locationService is null");
    this.filePrefix = requireNonNull(filePrefix, "filePrefix is null");

    this.pageSinkMetadataProvider = requireNonNull(pageSinkMetadataProvider,
            "pageSinkMetadataProvider is null");

    this.typeManager = requireNonNull(typeManager, "typeManager is null");

    this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
    this.immutablePartitions = immutablePartitions;

    // divide input columns into partition and data columns
    requireNonNull(inputColumns, "inputColumns is null");
    ImmutableList.Builder<String> partitionColumnNames = ImmutableList.builder();
    ImmutableList.Builder<Type> partitionColumnTypes = ImmutableList.builder();
    ImmutableList.Builder<DataColumn> dataColumns = ImmutableList.builder();
    for (HiveColumnHandle column : inputColumns) {
        HiveType hiveType = column.getHiveType();
        if (column.isPartitionKey()) {
            partitionColumnNames.add(column.getName());
            partitionColumnTypes.add(typeManager.getType(column.getTypeSignature()));
        } else {/*from w  w  w.j av  a 2 s .com*/
            dataColumns.add(
                    new DataColumn(column.getName(), typeManager.getType(column.getTypeSignature()), hiveType));
        }
    }
    this.partitionColumnNames = partitionColumnNames.build();
    this.partitionColumnTypes = partitionColumnTypes.build();
    this.dataColumns = dataColumns.build();

    if (isCreateTable) {
        this.table = null;
        Optional<Path> writePath = locationService.writePathRoot(locationHandle);
        checkArgument(writePath.isPresent(), "CREATE TABLE must have a write path");
        conf = new JobConf(hdfsEnvironment.getConfiguration(writePath.get()));
    } else {
        Optional<Table> table = pageSinkMetadataProvider.getTable();
        if (!table.isPresent()) {
            throw new PrestoException(HIVE_INVALID_METADATA,
                    format("Table %s.%s was dropped during insert", schemaName, tableName));
        }
        this.table = table.get();
        Path hdfsEnvironmentPath = locationService.writePathRoot(locationHandle)
                .orElseGet(() -> locationService.targetPathRoot(locationHandle));
        conf = new JobConf(hdfsEnvironment.getConfiguration(hdfsEnvironmentPath));
    }

    this.bucketCount = requireNonNull(bucketCount, "bucketCount is null");
    if (bucketCount.isPresent()) {
        checkArgument(bucketCount.getAsInt() < MAX_BUCKET_COUNT,
                "bucketCount must be smaller than " + MAX_BUCKET_COUNT);
    }

    this.session = requireNonNull(session, "session is null");
}