List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.facebook.hive.orc.MemoryManager.java
License:Open Source License
/** * Add a new writer's memory allocation to the pool. We use the path * as a unique key to ensure that we don't get duplicates. * @param path the file that is being written * @param requestedAllocation the requested buffer size * @param initialAllocation the current size of the buffer *//*from www. j av a2 s . c o m*/ synchronized void addWriter(Path path, long requestedAllocation, Callback callback, long initialAllocation) throws IOException { WriterInfo oldVal = writerList.get(path); // this should always be null, but we handle the case where the memory // manager wasn't told that a writer wasn't still in use and the task // starts writing to the same path. if (oldVal == null) { LOG.info("Registering writer for path " + path.toString()); oldVal = new WriterInfo(requestedAllocation, callback); writerList.put(path, oldVal); totalAllocation += requestedAllocation; } else { // handle a new writer that is writing to the same path totalAllocation += requestedAllocation - oldVal.allocation; oldVal.allocation = requestedAllocation; oldVal.callback = callback; } updateScale(true); // If we're not already in low memory mode, and the initial allocation already exceeds the // allocation, enter low memory mode to try to avoid an OOM if (!lowMemoryMode && requestedAllocation * currentScale <= initialAllocation) { lowMemoryMode = true; LOG.info("ORC: Switching to low memory mode"); for (WriterInfo writer : writerList.values()) { writer.callback.enterLowMemoryMode(); } } }
From source file:com.facebook.hive.orc.MemoryManager.java
License:Open Source License
/** * Remove the given writer from the pool. * @param path the file that has been closed *//*w ww . ja v a2 s . c o m*/ synchronized void removeWriter(Path path) throws IOException { WriterInfo val = writerList.get(path); if (val != null) { LOG.info("Unregeristering writer for path " + path.toString()); writerList.remove(path); totalAllocation -= val.allocation; updateScale(false); } }
From source file:com.facebook.hive.orc.MemoryManager.java
License:Open Source License
boolean shouldFlush(MemoryEstimate memoryEstimate, Path path, long stripeSize, long maxDictSize) throws IOException { WriterInfo writer = writerList.get(path); if (writer == null) { throw new IOException("No writer registered for path " + path.toString()); }//w w w. jav a 2 s . co m long limit = Math.round(stripeSize * currentScale * writer.allocationMultiplier); if (memoryEstimate.getTotalMemory() > limit || (maxDictSize > 0 && memoryEstimate.getDictionaryMemory() > maxDictSize)) { writer.flushedCount++; return true; } return false; }
From source file:com.facebook.hiveio.common.HadoopUtils.java
License:Apache License
/** * Set worker output directory/*from w ww . j a v a2 s .co m*/ * @param context Task context * @throws IOException I/O errors */ public static void setWorkOutputDir(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); String outputPath = getOutputDir(conf); // we need to do this to get the task path and set it for mapred // implementation since it can't be done automatically because of // mapreduce->mapred abstraction if (outputPath != null) { FileOutputCommitter foc = new FileOutputCommitter(getOutputPath(conf), context); Path path = foc.getWorkPath(); FileSystem fs = path.getFileSystem(conf); fs.mkdirs(path); conf.set("mapred.work.output.dir", path.toString()); LOG.info("Setting mapred.work.output.dir to {}", path.toString()); } }
From source file:com.facebook.presto.hive.AbstractTestHiveClient.java
License:Apache License
protected void createEmptyTable(SchemaTableName schemaTableName, HiveStorageFormat hiveStorageFormat, List<Column> columns, List<Column> partitionColumns) throws Exception { Path targetPath; try (Transaction transaction = newTransaction()) { ConnectorSession session = newSession(); String tableOwner = session.getUser(); String schemaName = schemaTableName.getSchemaName(); String tableName = schemaTableName.getTableName(); LocationService locationService = getLocationService(schemaName); LocationHandle locationHandle = locationService.forNewTable(transaction.getMetastore(schemaName), session.getUser(), session.getQueryId(), schemaName, tableName); targetPath = locationService.targetPathRoot(locationHandle); Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName) .setOwner(tableOwner).setTableType(TableType.MANAGED_TABLE.name()) .setParameters(ImmutableMap.of()).setDataColumns(columns).setPartitionColumns(partitionColumns); tableBuilder.getStorageBuilder().setLocation(targetPath.toString()) .setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) .setSerdeParameters(ImmutableMap.of()); PrivilegeGrantInfo allPrivileges = new PrivilegeGrantInfo("all", 0, tableOwner, PrincipalType.USER, true);//w w w . ja v a 2 s. c om PrincipalPrivilegeSet principalPrivilegeSet = new PrincipalPrivilegeSet( ImmutableMap.of(session.getUser(), ImmutableList.of(allPrivileges)), ImmutableMap.of(), ImmutableMap.of()); transaction.getMetastore(schemaName).createTable(session, tableBuilder.build(), principalPrivilegeSet, Optional.empty()); transaction.commit(); } ConnectorSession session = newSession(); List<String> targetDirectoryList = listDirectory(session.getUser(), targetPath); assertEquals(targetDirectoryList, ImmutableList.of()); }
From source file:com.facebook.presto.hive.HiveClient.java
License:Apache License
@Override public HiveOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata) { checkArgument(!isNullOrEmpty(tableMetadata.getOwner()), "Table owner is null or empty"); ImmutableList.Builder<String> columnNames = ImmutableList.builder(); ImmutableList.Builder<Type> columnTypes = ImmutableList.builder(); for (ColumnMetadata column : tableMetadata.getColumns()) { columnNames.add(column.getName()); columnTypes.add(column.getType()); }//from w ww.j av a2 s .com if (tableMetadata.isSampled()) { columnNames.add(SAMPLE_WEIGHT_COLUMN_NAME); columnTypes.add(BIGINT); } // get the root directory for the database SchemaTableName table = tableMetadata.getTable(); String schemaName = table.getSchemaName(); String tableName = table.getTableName(); String location = getDatabase(schemaName).getLocationUri(); if (isNullOrEmpty(location)) { throw new RuntimeException(format("Database '%s' location is not set", schemaName)); } Path databasePath = new Path(location); if (!pathExists(databasePath)) { throw new RuntimeException( format("Database '%s' location does not exist: %s", schemaName, databasePath)); } if (!isDirectory(databasePath)) { throw new RuntimeException( format("Database '%s' location is not a directory: %s", schemaName, databasePath)); } // verify the target directory for the table Path targetPath = new Path(databasePath, tableName); if (pathExists(targetPath)) { throw new RuntimeException( format("Target directory for table '%s' already exists: %s", table, targetPath)); } if (!useTemporaryDirectory(targetPath)) { return new HiveOutputTableHandle(connectorId, schemaName, tableName, columnNames.build(), columnTypes.build(), tableMetadata.getOwner(), targetPath.toString(), targetPath.toString()); } // use a per-user temporary directory to avoid permission problems // TODO: this should use Hadoop UserGroupInformation String temporaryPrefix = "/tmp/presto-" + StandardSystemProperty.USER_NAME.value(); // create a temporary directory on the same filesystem Path temporaryRoot = new Path(targetPath, temporaryPrefix); Path temporaryPath = new Path(temporaryRoot, randomUUID().toString()); createDirectories(temporaryPath); return new HiveOutputTableHandle(connectorId, schemaName, tableName, columnNames.build(), columnTypes.build(), tableMetadata.getOwner(), targetPath.toString(), temporaryPath.toString()); }
From source file:com.facebook.presto.hive.HiveClient.java
License:Apache License
@Override public void commitCreateTable(ConnectorOutputTableHandle tableHandle, Collection<String> fragments) { HiveOutputTableHandle handle = checkType(tableHandle, HiveOutputTableHandle.class, "tableHandle"); // verify no one raced us to create the target directory Path targetPath = new Path(handle.getTargetPath()); // rename if using a temporary directory if (handle.hasTemporaryPath()) { if (pathExists(targetPath)) { SchemaTableName table = new SchemaTableName(handle.getSchemaName(), handle.getTableName()); throw new RuntimeException( format("Unable to commit creation of table '%s': target directory already exists: %s", table, targetPath)); }/*from w ww . j a v a 2 s . co m*/ // rename the temporary directory to the target rename(new Path(handle.getTemporaryPath()), targetPath); } // create the table in the metastore List<String> types = FluentIterable.from(handle.getColumnTypes()).transform(columnTypeToHiveType()) .transform(hiveTypeNameGetter()).toList(); boolean sampled = false; ImmutableList.Builder<FieldSchema> columns = ImmutableList.builder(); for (int i = 0; i < handle.getColumnNames().size(); i++) { String name = handle.getColumnNames().get(i); String type = types.get(i); if (name.equals(SAMPLE_WEIGHT_COLUMN_NAME)) { columns.add(new FieldSchema(name, type, "Presto sample weight column")); sampled = true; } else { columns.add(new FieldSchema(name, type, null)); } } SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(handle.getTableName()); serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe()); serdeInfo.setParameters(ImmutableMap.<String, String>of()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(targetPath.toString()); sd.setCols(columns.build()); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(hiveStorageFormat.getInputFormat()); sd.setOutputFormat(hiveStorageFormat.getOutputFormat()); sd.setParameters(ImmutableMap.<String, String>of()); Table table = new Table(); table.setDbName(handle.getSchemaName()); table.setTableName(handle.getTableName()); table.setOwner(handle.getTableOwner()); table.setTableType(TableType.MANAGED_TABLE.toString()); String tableComment = "Created by Presto"; if (sampled) { tableComment = "Sampled table created by Presto. Only query this table from Hive if you understand how Presto implements sampling."; } table.setParameters(ImmutableMap.of("comment", tableComment)); table.setPartitionKeys(ImmutableList.<FieldSchema>of()); table.setSd(sd); metastore.createTable(table); }
From source file:com.facebook.presto.hive.HiveMetadata.java
License:Apache License
private static Table buildTableObject(String queryId, String schemaName, String tableName, String tableOwner, List<HiveColumnHandle> columnHandles, HiveStorageFormat hiveStorageFormat, List<String> partitionedBy, Optional<HiveBucketProperty> bucketProperty, Map<String, String> additionalTableParameters, Path targetPath, boolean external, String prestoVersion) { Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);// ww w .j a v a2s . c o m List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get) .map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())) .collect(toList()); Set<String> partitionColumnNames = ImmutableSet.copyOf(partitionedBy); ImmutableList.Builder<Column> columns = ImmutableList.builder(); for (HiveColumnHandle columnHandle : columnHandles) { String name = columnHandle.getName(); HiveType type = columnHandle.getHiveType(); if (!partitionColumnNames.contains(name)) { verify(!columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property"); columns.add(new Column(name, type, columnHandle.getComment())); } else { verify(columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property"); } } ImmutableMap.Builder<String, String> tableParameters = ImmutableMap.<String, String>builder() .put("comment", "Created by Presto").put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, queryId).putAll(additionalTableParameters); if (external) { tableParameters.put("EXTERNAL", "TRUE"); } Table.Builder tableBuilder = Table.builder().setDatabaseName(schemaName).setTableName(tableName) .setOwner(tableOwner).setTableType((external ? EXTERNAL_TABLE : MANAGED_TABLE).name()) .setDataColumns(columns.build()).setPartitionColumns(partitionColumns) .setParameters(tableParameters.build()); tableBuilder.getStorageBuilder().setStorageFormat(fromHiveStorageFormat(hiveStorageFormat)) .setBucketProperty(bucketProperty).setLocation(targetPath.toString()); return tableBuilder.build(); }
From source file:com.facebook.presto.hive.HivePageSink.java
License:Apache License
private HiveRecordWriter createWriter(List<Object> partitionRow) { checkArgument(partitionRow.size() == partitionColumnNames.size(), "size of partitionRow is different from partitionColumnNames"); List<String> partitionValues = partitionRow.stream().map(Object::toString) // todo this seems wrong .collect(toList());/* w ww . jav a 2 s. co m*/ Optional<String> partitionName; if (!partitionColumnNames.isEmpty()) { partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues)); } else { partitionName = Optional.empty(); } // attempt to get the existing partition (if this is an existing partitioned table) Optional<Partition> partition = Optional.empty(); if (!partitionRow.isEmpty() && table != null) { partition = metastore.getPartition(schemaName, tableName, partitionName.get()); } boolean isNew; Properties schema; Path target; Path write; String outputFormat; String serDe; if (!partition.isPresent()) { if (table == null) { // Write to: a new partition in a new partitioned table, // or a new unpartitioned table. isNew = true; schema = new Properties(); schema.setProperty(META_TABLE_COLUMNS, Joiner.on(',').join(dataColumnNames)); schema.setProperty(META_TABLE_COLUMN_TYPES, dataColumnTypes.stream().map(HiveType::toHiveType) .map(HiveType::getHiveTypeName).collect(Collectors.joining(":"))); target = locationService.targetPath(locationHandle, partitionName); write = locationService.writePath(locationHandle, partitionName).get(); if (partitionName.isPresent()) { // verify the target directory for the partition does not already exist if (HiveWriteUtils.pathExists(hdfsEnvironment, target)) { throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format( "Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, target)); } } outputFormat = tableStorageFormat.getOutputFormat(); serDe = tableStorageFormat.getSerDe(); } else { // Write to: a new partition in an existing partitioned table, // or an existing unpartitioned table if (partitionName.isPresent()) { isNew = true; } else { if (immutablePartitions) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Unpartitioned Hive tables are immutable"); } isNew = false; } schema = MetaStoreUtils.getSchema(table.getSd(), table.getSd(), table.getParameters(), schemaName, tableName, table.getPartitionKeys()); target = locationService.targetPath(locationHandle, partitionName); write = locationService.writePath(locationHandle, partitionName).orElse(target); if (respectTableFormat) { outputFormat = table.getSd().getOutputFormat(); } else { outputFormat = tableStorageFormat.getOutputFormat(); } serDe = table.getSd().getSerdeInfo().getSerializationLib(); } } else { // Write to: an existing partition in an existing partitioned table, if (immutablePartitions) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Hive partitions are immutable"); } isNew = false; // Append to an existing partition HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get()); StorageDescriptor storageDescriptor = partition.get().getSd(); outputFormat = storageDescriptor.getOutputFormat(); serDe = storageDescriptor.getSerdeInfo().getSerializationLib(); schema = MetaStoreUtils.getSchema(partition.get(), table); target = locationService.targetPath(locationHandle, partition.get(), partitionName.get()); write = locationService.writePath(locationHandle, partitionName).orElse(target); } return new HiveRecordWriter(schemaName, tableName, partitionName.orElse(""), isNew, dataColumnNames, dataColumnTypes, outputFormat, serDe, schema, generateRandomFileName(outputFormat), write.toString(), target.toString(), typeManager, conf); }
From source file:com.facebook.presto.hive.HiveWriterFactory.java
License:Apache License
public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) { if (bucketCount.isPresent()) { checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table"); checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount); } else {/*from www.j a v a2 s .com*/ checkArgument(!bucketNumber.isPresent(), "Bucket number provided by for table that is not bucketed"); } String fileName; if (bucketNumber.isPresent()) { fileName = computeBucketedFileName(filePrefix, bucketNumber.getAsInt()); } else { fileName = filePrefix + "_" + randomUUID(); } List<String> partitionValues = toPartitionValues(partitionColumns, position); Optional<String> partitionName; if (!partitionColumnNames.isEmpty()) { partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues)); } else { partitionName = Optional.empty(); } // attempt to get the existing partition (if this is an existing partitioned table) Optional<Partition> partition = Optional.empty(); if (!partitionValues.isEmpty() && table != null) { partition = pageSinkMetadataProvider.getPartition(partitionValues); } boolean isNew; Properties schema; Path target; Path write; StorageFormat outputStorageFormat; if (!partition.isPresent()) { if (table == null) { // Write to: a new partition in a new partitioned table, // or a new unpartitioned table. isNew = true; schema = new Properties(); schema.setProperty(META_TABLE_COLUMNS, dataColumns.stream().map(DataColumn::getName).collect(joining(","))); schema.setProperty(META_TABLE_COLUMN_TYPES, dataColumns.stream().map(DataColumn::getHiveType) .map(HiveType::getHiveTypeName).collect(joining(":"))); target = locationService.targetPath(locationHandle, partitionName); write = locationService.writePath(locationHandle, partitionName).get(); if (partitionName.isPresent() && !target.equals(write)) { // When target path is different from write path, // verify that the target directory for the partition does not already exist if (HiveWriteUtils.pathExists(session.getUser(), hdfsEnvironment, target)) { throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format( "Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, target)); } } } else { // Write to: a new partition in an existing partitioned table, // or an existing unpartitioned table if (partitionName.isPresent()) { isNew = true; } else { if (bucketNumber.isPresent()) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Can not insert into bucketed unpartitioned Hive table"); } if (immutablePartitions) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Unpartitioned Hive tables are immutable"); } isNew = false; } schema = getHiveSchema(table); target = locationService.targetPath(locationHandle, partitionName); write = locationService.writePath(locationHandle, partitionName).orElse(target); } if (partitionName.isPresent()) { // Write to a new partition outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat); } else { // Write to a new/existing unpartitioned table outputStorageFormat = fromHiveStorageFormat(tableStorageFormat); } } else { // Write to: an existing partition in an existing partitioned table, if (bucketNumber.isPresent()) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Can not insert into existing partitions of bucketed Hive table"); } if (immutablePartitions) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Hive partitions are immutable"); } isNew = false; // Check the column types in partition schema match the column types in table schema List<Column> tableColumns = table.getDataColumns(); List<Column> existingPartitionColumns = partition.get().getColumns(); for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) { HiveType tableType = tableColumns.get(i).getType(); HiveType partitionType = existingPartitionColumns.get(i).getType(); if (!tableType.equals(partitionType)) { throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "There is a mismatch between the table and partition schemas. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partitionName, existingPartitionColumns.get(i).getName(), partitionType)); } } // Append to an existing partition HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get()); outputStorageFormat = partition.get().getStorage().getStorageFormat(); schema = getHiveSchema(partition.get(), table); target = locationService.targetPath(locationHandle, partition.get(), partitionName.get()); write = locationService.writePath(locationHandle, partitionName).orElse(target); } validateSchema(partitionName, schema); String fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat); HiveRecordWriter hiveRecordWriter = new HiveRecordWriter(new Path(write, fileNameWithExtension), dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, typeManager, conf); return new HiveWriter(hiveRecordWriter, partitionName, isNew, fileNameWithExtension, write.toString(), target.toString()); }