List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:io.druid.storage.hdfs.HdfsDataSegmentPusher.java
License:Apache License
@Override public DataSegment push(File inDir, DataSegment segment) throws IOException { final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment); log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getIdentifier(), config.getStorageDirectory(), storageDir); Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir)); FileSystem fs = outFile.getFileSystem(hadoopConfig); fs.mkdirs(outFile.getParent()); log.info("Compressing files from[%s] to [%s]", inDir, outFile); final long size; try (FSDataOutputStream out = fs.create(outFile)) { size = CompressionUtils.zip(inDir, out); }/*from ww w . j ava 2 s . c o m*/ return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size) .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)), outFile.getParent(), fs); }
From source file:io.druid.storage.hdfs.HdfsFileTimestampVersionFinder.java
License:Apache License
/** * Returns the latest modified file at the uri of interest. * * @param uri Either a directory or a file on HDFS. If it is a file, the parent directory will be searched. * @param pattern A pattern matcher for file names in the directory of interest. Passing `null` results in matching any file in the directory. * * @return The URI of the file with the most recent modified timestamp. *//* w ww . j a v a 2 s . c o m*/ @Override public URI getLatestVersion(final URI uri, final Pattern pattern) { final Path path = new Path(uri); try { return RetryUtils.retry(new Callable<URI>() { @Override public URI call() throws Exception { final FileSystem fs = path.getFileSystem(config); if (!fs.exists(path)) { return null; } return mostRecentInDir(fs.isDirectory(path) ? path : path.getParent(), pattern); } }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT); } catch (Exception e) { throw Throwables.propagate(e); } }
From source file:io.hops.erasure_coding.Encoder.java
License:Apache License
/** * The interface to use to generate a parity file. * This method can be called multiple times with the same Encoder object, * thus allowing reuse of the buffers allocated by the Encoder object. * * @param fs//w w w. j a v a2 s. co m * The filesystem containing the source file. * @param srcFile * The source file. * @param parityFile * The parity file to be generated. */ public void encodeFile(Configuration jobConf, FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile, short parityRepl, long numStripes, long blockSize, Progressable reporter, StripeReader sReader) throws IOException { long expectedParityBlocks = numStripes * codec.parityLength; long expectedParityFileSize = numStripes * blockSize * codec.parityLength; if (!parityFs.mkdirs(parityFile.getParent())) { throw new IOException("Could not create parent dir " + parityFile.getParent()); } // delete destination if exists if (parityFs.exists(parityFile)) { parityFs.delete(parityFile, false); } // Writing out a large parity file at replication 1 is difficult since // some datanode could die and we would not be able to close() the file. // So write at replication 2 and then reduce it after close() succeeds. short tmpRepl = parityRepl; if (expectedParityBlocks >= conf.getInt("raid.encoder.largeparity.blocks", 20)) { if (parityRepl == 1) { tmpRepl = 2; } } FSDataOutputStream out = parityFs.create(parityFile, true, conf.getInt("io.file.buffer.size", 64 * 1024), tmpRepl, blockSize); DFSOutputStream dfsOut = (DFSOutputStream) out.getWrappedStream(); dfsOut.enableParityStream(codec.getStripeLength(), codec.getParityLength(), srcFile.toUri().getPath()); try { encodeFileToStream(fs, srcFile, parityFile, sReader, blockSize, out, reporter); out.close(); out = null; LOG.info("Wrote parity file " + parityFile); FileStatus tmpStat = parityFs.getFileStatus(parityFile); if (tmpStat.getLen() != expectedParityFileSize) { throw new IOException("Expected parity size " + expectedParityFileSize + " does not match actual " + tmpStat.getLen()); } if (tmpRepl > parityRepl) { parityFs.setReplication(parityFile, parityRepl); } LOG.info("Wrote parity file " + parityFile); } finally { if (out != null) { out.close(); } } }
From source file:io.pivotal.spring.xd.module.job.hdfs.loader.PartitionLogicTest.java
License:Apache License
@Test public void testHdfsPathPartition() { String hdfsBaseDir = "/xd/basedir"; String expression = "region + '/' + dateFormat('yyyy/MM/dd/HH/mm', timestamp)"; DefaultPartitionStrategy<String> partitionStrategy = new DefaultPartitionStrategy<String>(expression); DefaultPartitionKey key = new DefaultPartitionKey(); key.put("region", hdfsBaseDir); PartitionResolver<Map<String, Object>> resolver = partitionStrategy.getPartitionResolver(); Path resolvedPath = resolver.resolvePath(key); Assert.assertNotNull(resolvedPath);// www .j ava 2s . c o m Assert.assertEquals("/xd/basedir", resolvedPath.getParent().getParent().getParent().getParent().getParent().toString()); System.out.println(resolvedPath); }
From source file:io.prestosql.plugin.hive.HiveWriterFactory.java
License:Apache License
public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) { if (bucketCount.isPresent()) { checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table"); checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount); } else {// w w w . j ava 2 s . c o m checkArgument(!bucketNumber.isPresent(), "Bucket number provided by for table that is not bucketed"); } String fileName; if (bucketNumber.isPresent()) { fileName = computeBucketedFileName(filePrefix, bucketNumber.getAsInt()); } else { fileName = filePrefix + "_" + randomUUID(); } List<String> partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position); Optional<String> partitionName; if (!partitionColumnNames.isEmpty()) { partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues)); } else { partitionName = Optional.empty(); } // attempt to get the existing partition (if this is an existing partitioned table) Optional<Partition> partition = Optional.empty(); if (!partitionValues.isEmpty() && table != null) { partition = pageSinkMetadataProvider.getPartition(partitionValues); } UpdateMode updateMode; Properties schema; WriteInfo writeInfo; StorageFormat outputStorageFormat; if (!partition.isPresent()) { if (table == null) { // Write to: a new partition in a new partitioned table, // or a new unpartitioned table. updateMode = UpdateMode.NEW; schema = new Properties(); schema.setProperty(META_TABLE_COLUMNS, dataColumns.stream().map(DataColumn::getName).collect(joining(","))); schema.setProperty(META_TABLE_COLUMN_TYPES, dataColumns.stream().map(DataColumn::getHiveType) .map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":"))); if (!partitionName.isPresent()) { // new unpartitioned table writeInfo = locationService.getTableWriteInfo(locationHandle); } else { // a new partition in a new partitioned table writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get()); if (!writeInfo.getWriteMode().isWritePathSameAsTargetPath()) { // When target path is different from write path, // verify that the target directory for the partition does not already exist if (HiveWriteUtils.pathExists(new HdfsContext(session, schemaName, tableName), hdfsEnvironment, writeInfo.getTargetPath())) { throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format( "Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, writeInfo.getTargetPath())); } } } } else { // Write to: a new partition in an existing partitioned table, // or an existing unpartitioned table if (partitionName.isPresent()) { // a new partition in an existing partitioned table updateMode = UpdateMode.NEW; writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get()); } else { if (bucketNumber.isPresent()) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into bucketed unpartitioned Hive table"); } if (immutablePartitions) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Unpartitioned Hive tables are immutable"); } updateMode = UpdateMode.APPEND; writeInfo = locationService.getTableWriteInfo(locationHandle); } schema = getHiveSchema(table); } if (partitionName.isPresent()) { // Write to a new partition outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat); } else { // Write to a new/existing unpartitioned table outputStorageFormat = fromHiveStorageFormat(tableStorageFormat); } } else { // Write to: an existing partition in an existing partitioned table if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.APPEND) { // Append to an existing partition checkState(!immutablePartitions); if (bucketNumber.isPresent()) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into existing partition of bucketed Hive table: " + partitionName.get()); } updateMode = UpdateMode.APPEND; // Check the column types in partition schema match the column types in table schema List<Column> tableColumns = table.getDataColumns(); List<Column> existingPartitionColumns = partition.get().getColumns(); for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) { HiveType tableType = tableColumns.get(i).getType(); HiveType partitionType = existingPartitionColumns.get(i).getType(); if (!tableType.equals(partitionType)) { throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "You are trying to write into an existing partition in a table. " + "The table schema has changed since the creation of the partition. " + "Inserting rows into such partition is not supported. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partitionName, existingPartitionColumns.get(i).getName(), partitionType)); } } HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get()); outputStorageFormat = partition.get().getStorage().getStorageFormat(); schema = getHiveSchema(partition.get(), table); writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get()); } else if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.OVERWRITE) { // Overwrite an existing partition // // The behavior of overwrite considered as if first dropping the partition and inserting a new partition, thus: // * No partition writable check is required. // * Table schema and storage format is used for the new partition (instead of existing partition schema and storage format). updateMode = UpdateMode.OVERWRITE; outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat); schema = getHiveSchema(table); writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(), partitionName.get()); checkState(writeInfo.getWriteMode() != DIRECT_TO_TARGET_EXISTING_DIRECTORY, "Overwriting existing partition doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode"); } else if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.ERROR) { throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionName.get()); } else { throw new IllegalArgumentException(format("Unsupported insert existing partitions behavior: %s", insertExistingPartitionsBehavior)); } } validateSchema(partitionName, schema); String fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat); Path path = new Path(writeInfo.getWritePath(), fileNameWithExtension); HiveFileWriter hiveFileWriter = null; for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) { Optional<HiveFileWriter> fileWriter = fileWriterFactory.createFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, conf, session); if (fileWriter.isPresent()) { hiveFileWriter = fileWriter.get(); break; } } if (hiveFileWriter == null) { hiveFileWriter = new RecordFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, partitionStorageFormat.getEstimatedWriterSystemMemoryUsage(), conf, typeManager, session); } String writerImplementation = hiveFileWriter.getClass().getName(); Consumer<HiveWriter> onCommit = hiveWriter -> { Optional<Long> size; try { size = Optional.of( hdfsEnvironment.getFileSystem(session.getUser(), path, conf).getFileStatus(path).getLen()); } catch (IOException | RuntimeException e) { // Do not fail the query if file system is not available size = Optional.empty(); } eventClient.post(new WriteCompletedEvent(session.getQueryId(), path.toString(), schemaName, tableName, partitionName.orElse(null), outputStorageFormat.getOutputFormat(), writerImplementation, nodeManager.getCurrentNode().getVersion(), nodeManager.getCurrentNode().getHttpUri().getHost(), session.getIdentity().getPrincipal().map(Principal::getName).orElse(null), nodeManager.getEnvironment(), sessionProperties, size.orElse(null), hiveWriter.getRowCount())); }; if (!sortedBy.isEmpty()) { FileSystem fileSystem; try { fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, e); } List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager)) .collect(toImmutableList()); Map<String, Integer> columnIndexes = new HashMap<>(); for (int i = 0; i < dataColumns.size(); i++) { columnIndexes.put(dataColumns.get(i).getName(), i); } List<Integer> sortFields = new ArrayList<>(); List<SortOrder> sortOrders = new ArrayList<>(); for (SortingColumn column : sortedBy) { Integer index = columnIndexes.get(column.getColumnName()); if (index == null) { throw new PrestoException(HIVE_INVALID_METADATA, format("Sorting column '%s' does exist in table '%s.%s'", column.getColumnName(), schemaName, tableName)); } sortFields.add(index); sortOrders.add(column.getOrder().getSortOrder()); } hiveFileWriter = new SortingFileWriter(fileSystem, new Path(path.getParent(), ".tmp-sort." + path.getName()), hiveFileWriter, sortBufferSize, maxOpenSortFiles, types, sortFields, sortOrders, pageSorter, (fs, p) -> orcFileWriterFactory.createOrcDataSink(session, fs, p)); } return new HiveWriter(hiveFileWriter, partitionName, updateMode, fileNameWithExtension, writeInfo.getWritePath().toString(), writeInfo.getTargetPath().toString(), onCommit, hiveWriterStats); }
From source file:io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore.java
License:Apache License
private static void renameDirectory(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path source, Path target, Runnable runWhenPathDoesntExist) { if (pathExists(context, hdfsEnvironment, target)) { throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format("Unable to rename from %s to %s: target directory already exists", source, target)); }//from w ww . j a v a 2s .co m if (!pathExists(context, hdfsEnvironment, target.getParent())) { createDirectory(context, hdfsEnvironment, target.getParent()); } // The runnable will assume that if rename fails, it will be okay to delete the directory (if the directory is empty). // This is not technically true because a race condition still exists. runWhenPathDoesntExist.run(); try { if (!hdfsEnvironment.getFileSystem(context, source).rename(source, target)) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Failed to rename %s to %s: rename returned false", source, target)); } } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Failed to rename %s to %s", source, target), e); } }
From source file:it.tizianofagni.sparkboost.DataUtils.java
License:Apache License
/** * Write a text file on Hadoop file system by using standard Hadoop API. * * @param outputPath The file to be written. * @param content The content to put in the file. *//* w w w . j a v a2 s . c o m*/ public static void saveHadoopTextFile(String outputPath, String content) { try { Configuration configuration = new Configuration(); Path file = new Path(outputPath); Path parentFile = file.getParent(); FileSystem hdfs = FileSystem.get(file.toUri(), configuration); if (parentFile != null) hdfs.mkdirs(parentFile); OutputStream os = hdfs.create(file, true); BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8")); br.write(content); br.close(); hdfs.close(); } catch (Exception e) { throw new RuntimeException("Writing Hadoop text file", e); } }
From source file:jp.ac.u.tokyo.m.pig.udf.load.LoadDataWithSchema.java
License:Apache License
@Override public ResourceSchema getSchema(String aLocation, Job aJob) throws IOException { Configuration tConfiguration = aJob.getConfiguration(); Path tDataPath = new Path(aLocation); FileSystem tFileSystem = tDataPath.getFileSystem(tConfiguration); Path tSchemaFilePath = tFileSystem.isFile(tDataPath) ? new Path(tDataPath.getParent(), StoreConstants.STORE_FILE_NAME_SCHEMA) : new Path(tDataPath, StoreConstants.STORE_FILE_NAME_SCHEMA); RowSchema tRowSchema = LoadSchemaUtil.loadSchemaFile(tFileSystem, tSchemaFilePath, mEncoding); ResourceSchema tResourceSchema = new ResourceSchema(); TypeStringCasterPigToPigTypeByte tTypeCaster = TypeStringCasterPigToPigTypeByte.INSTANCE; List<ColumnSchema> tColumnSchemaList = tRowSchema.getColumnSchemaList(); int tSize = tColumnSchemaList.size(); ResourceFieldSchema[] tResourceFieldSchemas = new ResourceFieldSchema[tSize]; int tIndex = 0; for (ColumnSchema tCurrentColumnSchema : tColumnSchemaList) { tResourceFieldSchemas[tIndex++] = new ResourceFieldSchema(new FieldSchema( tCurrentColumnSchema.getName(), tTypeCaster.castTypeString(tCurrentColumnSchema.getType()))); }// w ww . j a va2 s . co m tResourceSchema.setFields(tResourceFieldSchemas); return tResourceSchema; }
From source file:kogiri.common.json.JsonSerializer.java
License:Open Source License
public void toJsonFile(FileSystem fs, Path file, Object obj) throws IOException { if (!fs.exists(file.getParent())) { fs.mkdirs(file.getParent());// w ww .j av a2 s . c o m } DataOutputStream ostream = fs.create(file, true, 64 * 1024, (short) 3, 1024 * 1024); this.mapper.writeValue(ostream, obj); ostream.close(); }
From source file:kogiri.mapreduce.libra.kmersimilarity_m.KmerSimilarityMap.java
License:Open Source License
private void commit(Path outputPath, Configuration conf) throws IOException { FileSystem fs = outputPath.getFileSystem(conf); FileStatus status = fs.getFileStatus(outputPath); if (status.isDir()) { FileStatus[] entries = fs.listStatus(outputPath); for (FileStatus entry : entries) { Path entryPath = entry.getPath(); // remove unnecessary outputs if (MapReduceHelper.isLogFiles(entryPath)) { fs.delete(entryPath, true); } else if (MapReduceHelper.isPartialOutputFiles(entryPath)) { // rename outputs int mapreduceID = MapReduceHelper.getMapReduceID(entryPath); String newName = KmerSimilarityHelper.makeKmerSimilarityResultFileName(mapreduceID); Path toPath = new Path(entryPath.getParent(), newName); LOG.info("output : " + entryPath.toString()); LOG.info("renamed to : " + toPath.toString()); fs.rename(entryPath, toPath); } else { // let it be }/*from www . j a v a 2s . c o m*/ } } else { throw new IOException("path not found : " + outputPath.toString()); } }