Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:io.druid.storage.hdfs.HdfsDataSegmentPusher.java

License:Apache License

@Override
public DataSegment push(File inDir, DataSegment segment) throws IOException {
    final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment);

    log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getIdentifier(),
            config.getStorageDirectory(), storageDir);

    Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir));
    FileSystem fs = outFile.getFileSystem(hadoopConfig);

    fs.mkdirs(outFile.getParent());
    log.info("Compressing files from[%s] to [%s]", inDir, outFile);

    final long size;
    try (FSDataOutputStream out = fs.create(outFile)) {
        size = CompressionUtils.zip(inDir, out);
    }/*from  ww w .  j ava  2 s .  c  o  m*/

    return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size)
            .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)), outFile.getParent(), fs);
}

From source file:io.druid.storage.hdfs.HdfsFileTimestampVersionFinder.java

License:Apache License

/**
 * Returns the latest modified file at the uri of interest.
 *
 * @param uri     Either a directory or a file on HDFS. If it is a file, the parent directory will be searched.
 * @param pattern A pattern matcher for file names in the directory of interest. Passing `null` results in matching any file in the directory.
 *
 * @return The URI of the file with the most recent modified timestamp.
 *//*  w  ww .  j  a  v  a  2 s  . c  o m*/
@Override
public URI getLatestVersion(final URI uri, final Pattern pattern) {
    final Path path = new Path(uri);
    try {
        return RetryUtils.retry(new Callable<URI>() {
            @Override
            public URI call() throws Exception {
                final FileSystem fs = path.getFileSystem(config);
                if (!fs.exists(path)) {
                    return null;
                }
                return mostRecentInDir(fs.isDirectory(path) ? path : path.getParent(), pattern);
            }
        }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:io.hops.erasure_coding.Encoder.java

License:Apache License

/**
 * The interface to use to generate a parity file.
 * This method can be called multiple times with the same Encoder object,
 * thus allowing reuse of the buffers allocated by the Encoder object.
 *
 * @param fs//w  w w.  j a v  a2 s.  co  m
 *     The filesystem containing the source file.
 * @param srcFile
 *     The source file.
 * @param parityFile
 *     The parity file to be generated.
 */
public void encodeFile(Configuration jobConf, FileSystem fs, Path srcFile, FileSystem parityFs, Path parityFile,
        short parityRepl, long numStripes, long blockSize, Progressable reporter, StripeReader sReader)
        throws IOException {
    long expectedParityBlocks = numStripes * codec.parityLength;
    long expectedParityFileSize = numStripes * blockSize * codec.parityLength;

    if (!parityFs.mkdirs(parityFile.getParent())) {
        throw new IOException("Could not create parent dir " + parityFile.getParent());
    }
    // delete destination if exists
    if (parityFs.exists(parityFile)) {
        parityFs.delete(parityFile, false);
    }

    // Writing out a large parity file at replication 1 is difficult since
    // some datanode could die and we would not be able to close() the file.
    // So write at replication 2 and then reduce it after close() succeeds.
    short tmpRepl = parityRepl;
    if (expectedParityBlocks >= conf.getInt("raid.encoder.largeparity.blocks", 20)) {
        if (parityRepl == 1) {
            tmpRepl = 2;
        }
    }
    FSDataOutputStream out = parityFs.create(parityFile, true, conf.getInt("io.file.buffer.size", 64 * 1024),
            tmpRepl, blockSize);

    DFSOutputStream dfsOut = (DFSOutputStream) out.getWrappedStream();
    dfsOut.enableParityStream(codec.getStripeLength(), codec.getParityLength(), srcFile.toUri().getPath());

    try {
        encodeFileToStream(fs, srcFile, parityFile, sReader, blockSize, out, reporter);
        out.close();
        out = null;
        LOG.info("Wrote parity file " + parityFile);
        FileStatus tmpStat = parityFs.getFileStatus(parityFile);
        if (tmpStat.getLen() != expectedParityFileSize) {
            throw new IOException("Expected parity size " + expectedParityFileSize + " does not match actual "
                    + tmpStat.getLen());
        }
        if (tmpRepl > parityRepl) {
            parityFs.setReplication(parityFile, parityRepl);
        }
        LOG.info("Wrote parity file " + parityFile);
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

From source file:io.pivotal.spring.xd.module.job.hdfs.loader.PartitionLogicTest.java

License:Apache License

@Test
public void testHdfsPathPartition() {

    String hdfsBaseDir = "/xd/basedir";

    String expression = "region + '/' + dateFormat('yyyy/MM/dd/HH/mm', timestamp)";

    DefaultPartitionStrategy<String> partitionStrategy = new DefaultPartitionStrategy<String>(expression);

    DefaultPartitionKey key = new DefaultPartitionKey();
    key.put("region", hdfsBaseDir);

    PartitionResolver<Map<String, Object>> resolver = partitionStrategy.getPartitionResolver();

    Path resolvedPath = resolver.resolvePath(key);

    Assert.assertNotNull(resolvedPath);// www .j  ava  2s .  c  o m

    Assert.assertEquals("/xd/basedir",
            resolvedPath.getParent().getParent().getParent().getParent().getParent().toString());

    System.out.println(resolvedPath);
}

From source file:io.prestosql.plugin.hive.HiveWriterFactory.java

License:Apache License

public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) {
    if (bucketCount.isPresent()) {
        checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table");
        checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(),
                "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount);
    } else {//  w w  w . j  ava 2  s  . c  o  m
        checkArgument(!bucketNumber.isPresent(), "Bucket number provided by for table that is not bucketed");
    }

    String fileName;
    if (bucketNumber.isPresent()) {
        fileName = computeBucketedFileName(filePrefix, bucketNumber.getAsInt());
    } else {
        fileName = filePrefix + "_" + randomUUID();
    }

    List<String> partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position);

    Optional<String> partitionName;
    if (!partitionColumnNames.isEmpty()) {
        partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues));
    } else {
        partitionName = Optional.empty();
    }

    // attempt to get the existing partition (if this is an existing partitioned table)
    Optional<Partition> partition = Optional.empty();
    if (!partitionValues.isEmpty() && table != null) {
        partition = pageSinkMetadataProvider.getPartition(partitionValues);
    }

    UpdateMode updateMode;
    Properties schema;
    WriteInfo writeInfo;
    StorageFormat outputStorageFormat;
    if (!partition.isPresent()) {
        if (table == null) {
            // Write to: a new partition in a new partitioned table,
            //           or a new unpartitioned table.
            updateMode = UpdateMode.NEW;
            schema = new Properties();
            schema.setProperty(META_TABLE_COLUMNS,
                    dataColumns.stream().map(DataColumn::getName).collect(joining(",")));
            schema.setProperty(META_TABLE_COLUMN_TYPES, dataColumns.stream().map(DataColumn::getHiveType)
                    .map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":")));

            if (!partitionName.isPresent()) {
                // new unpartitioned table
                writeInfo = locationService.getTableWriteInfo(locationHandle);
            } else {
                // a new partition in a new partitioned table
                writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition,
                        partitionName.get());

                if (!writeInfo.getWriteMode().isWritePathSameAsTargetPath()) {
                    // When target path is different from write path,
                    // verify that the target directory for the partition does not already exist
                    if (HiveWriteUtils.pathExists(new HdfsContext(session, schemaName, tableName),
                            hdfsEnvironment, writeInfo.getTargetPath())) {
                        throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format(
                                "Target directory for new partition '%s' of table '%s.%s' already exists: %s",
                                partitionName, schemaName, tableName, writeInfo.getTargetPath()));
                    }
                }
            }
        } else {
            // Write to: a new partition in an existing partitioned table,
            //           or an existing unpartitioned table
            if (partitionName.isPresent()) {
                // a new partition in an existing partitioned table
                updateMode = UpdateMode.NEW;
                writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition,
                        partitionName.get());
            } else {
                if (bucketNumber.isPresent()) {
                    throw new PrestoException(HIVE_PARTITION_READ_ONLY,
                            "Cannot insert into bucketed unpartitioned Hive table");
                }
                if (immutablePartitions) {
                    throw new PrestoException(HIVE_PARTITION_READ_ONLY,
                            "Unpartitioned Hive tables are immutable");
                }
                updateMode = UpdateMode.APPEND;
                writeInfo = locationService.getTableWriteInfo(locationHandle);
            }

            schema = getHiveSchema(table);
        }

        if (partitionName.isPresent()) {
            // Write to a new partition
            outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
        } else {
            // Write to a new/existing unpartitioned table
            outputStorageFormat = fromHiveStorageFormat(tableStorageFormat);
        }
    } else {
        // Write to: an existing partition in an existing partitioned table
        if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.APPEND) {
            // Append to an existing partition
            checkState(!immutablePartitions);
            if (bucketNumber.isPresent()) {
                throw new PrestoException(HIVE_PARTITION_READ_ONLY,
                        "Cannot insert into existing partition of bucketed Hive table: " + partitionName.get());
            }
            updateMode = UpdateMode.APPEND;
            // Check the column types in partition schema match the column types in table schema
            List<Column> tableColumns = table.getDataColumns();
            List<Column> existingPartitionColumns = partition.get().getColumns();
            for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) {
                HiveType tableType = tableColumns.get(i).getType();
                HiveType partitionType = existingPartitionColumns.get(i).getType();
                if (!tableType.equals(partitionType)) {
                    throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH,
                            format("" + "You are trying to write into an existing partition in a table. "
                                    + "The table schema has changed since the creation of the partition. "
                                    + "Inserting rows into such partition is not supported. "
                                    + "The column '%s' in table '%s' is declared as type '%s', "
                                    + "but partition '%s' declared column '%s' as type '%s'.",
                                    tableColumns.get(i).getName(), tableName, tableType, partitionName,
                                    existingPartitionColumns.get(i).getName(), partitionType));
                }
            }

            HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get());

            outputStorageFormat = partition.get().getStorage().getStorageFormat();
            schema = getHiveSchema(partition.get(), table);

            writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
        } else if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.OVERWRITE) {
            // Overwrite an existing partition
            //
            // The behavior of overwrite considered as if first dropping the partition and inserting a new partition, thus:
            // * No partition writable check is required.
            // * Table schema and storage format is used for the new partition (instead of existing partition schema and storage format).
            updateMode = UpdateMode.OVERWRITE;

            outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
            schema = getHiveSchema(table);

            writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(),
                    partitionName.get());
            checkState(writeInfo.getWriteMode() != DIRECT_TO_TARGET_EXISTING_DIRECTORY,
                    "Overwriting existing partition doesn't support DIRECT_TO_TARGET_EXISTING_DIRECTORY write mode");
        } else if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.ERROR) {
            throw new PrestoException(HIVE_PARTITION_READ_ONLY,
                    "Cannot insert into an existing partition of Hive table: " + partitionName.get());
        } else {
            throw new IllegalArgumentException(format("Unsupported insert existing partitions behavior: %s",
                    insertExistingPartitionsBehavior));
        }
    }

    validateSchema(partitionName, schema);

    String fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat);

    Path path = new Path(writeInfo.getWritePath(), fileNameWithExtension);

    HiveFileWriter hiveFileWriter = null;
    for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) {
        Optional<HiveFileWriter> fileWriter = fileWriterFactory.createFileWriter(path,
                dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema,
                conf, session);
        if (fileWriter.isPresent()) {
            hiveFileWriter = fileWriter.get();
            break;
        }
    }

    if (hiveFileWriter == null) {
        hiveFileWriter = new RecordFileWriter(path,
                dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema,
                partitionStorageFormat.getEstimatedWriterSystemMemoryUsage(), conf, typeManager, session);
    }

    String writerImplementation = hiveFileWriter.getClass().getName();

    Consumer<HiveWriter> onCommit = hiveWriter -> {
        Optional<Long> size;
        try {
            size = Optional.of(
                    hdfsEnvironment.getFileSystem(session.getUser(), path, conf).getFileStatus(path).getLen());
        } catch (IOException | RuntimeException e) {
            // Do not fail the query if file system is not available
            size = Optional.empty();
        }

        eventClient.post(new WriteCompletedEvent(session.getQueryId(), path.toString(), schemaName, tableName,
                partitionName.orElse(null), outputStorageFormat.getOutputFormat(), writerImplementation,
                nodeManager.getCurrentNode().getVersion(), nodeManager.getCurrentNode().getHttpUri().getHost(),
                session.getIdentity().getPrincipal().map(Principal::getName).orElse(null),
                nodeManager.getEnvironment(), sessionProperties, size.orElse(null), hiveWriter.getRowCount()));
    };

    if (!sortedBy.isEmpty()) {
        FileSystem fileSystem;
        try {
            fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
        } catch (IOException e) {
            throw new PrestoException(HIVE_WRITER_OPEN_ERROR, e);
        }

        List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager))
                .collect(toImmutableList());

        Map<String, Integer> columnIndexes = new HashMap<>();
        for (int i = 0; i < dataColumns.size(); i++) {
            columnIndexes.put(dataColumns.get(i).getName(), i);
        }

        List<Integer> sortFields = new ArrayList<>();
        List<SortOrder> sortOrders = new ArrayList<>();
        for (SortingColumn column : sortedBy) {
            Integer index = columnIndexes.get(column.getColumnName());
            if (index == null) {
                throw new PrestoException(HIVE_INVALID_METADATA,
                        format("Sorting column '%s' does exist in table '%s.%s'", column.getColumnName(),
                                schemaName, tableName));
            }
            sortFields.add(index);
            sortOrders.add(column.getOrder().getSortOrder());
        }

        hiveFileWriter = new SortingFileWriter(fileSystem,
                new Path(path.getParent(), ".tmp-sort." + path.getName()), hiveFileWriter, sortBufferSize,
                maxOpenSortFiles, types, sortFields, sortOrders, pageSorter,
                (fs, p) -> orcFileWriterFactory.createOrcDataSink(session, fs, p));
    }

    return new HiveWriter(hiveFileWriter, partitionName, updateMode, fileNameWithExtension,
            writeInfo.getWritePath().toString(), writeInfo.getTargetPath().toString(), onCommit,
            hiveWriterStats);
}

From source file:io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static void renameDirectory(HdfsContext context, HdfsEnvironment hdfsEnvironment, Path source,
        Path target, Runnable runWhenPathDoesntExist) {
    if (pathExists(context, hdfsEnvironment, target)) {
        throw new PrestoException(HIVE_PATH_ALREADY_EXISTS,
                format("Unable to rename from %s to %s: target directory already exists", source, target));
    }//from w  ww .  j  a  v  a 2s  .co  m

    if (!pathExists(context, hdfsEnvironment, target.getParent())) {
        createDirectory(context, hdfsEnvironment, target.getParent());
    }

    // The runnable will assume that if rename fails, it will be okay to delete the directory (if the directory is empty).
    // This is not technically true because a race condition still exists.
    runWhenPathDoesntExist.run();

    try {
        if (!hdfsEnvironment.getFileSystem(context, source).rename(source, target)) {
            throw new PrestoException(HIVE_FILESYSTEM_ERROR,
                    format("Failed to rename %s to %s: rename returned false", source, target));
        }
    } catch (IOException e) {
        throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Failed to rename %s to %s", source, target),
                e);
    }
}

From source file:it.tizianofagni.sparkboost.DataUtils.java

License:Apache License

/**
 * Write a text file on Hadoop file system by using standard Hadoop API.
 *
 * @param outputPath The file to be written.
 * @param content    The content to put in the file.
 *//* w w w . j a  v  a2  s  .  c  o m*/
public static void saveHadoopTextFile(String outputPath, String content) {
    try {
        Configuration configuration = new Configuration();
        Path file = new Path(outputPath);
        Path parentFile = file.getParent();
        FileSystem hdfs = FileSystem.get(file.toUri(), configuration);
        if (parentFile != null)
            hdfs.mkdirs(parentFile);
        OutputStream os = hdfs.create(file, true);
        BufferedWriter br = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
        br.write(content);
        br.close();
        hdfs.close();
    } catch (Exception e) {
        throw new RuntimeException("Writing Hadoop text file", e);
    }
}

From source file:jp.ac.u.tokyo.m.pig.udf.load.LoadDataWithSchema.java

License:Apache License

@Override
public ResourceSchema getSchema(String aLocation, Job aJob) throws IOException {
    Configuration tConfiguration = aJob.getConfiguration();
    Path tDataPath = new Path(aLocation);
    FileSystem tFileSystem = tDataPath.getFileSystem(tConfiguration);
    Path tSchemaFilePath = tFileSystem.isFile(tDataPath)
            ? new Path(tDataPath.getParent(), StoreConstants.STORE_FILE_NAME_SCHEMA)
            : new Path(tDataPath, StoreConstants.STORE_FILE_NAME_SCHEMA);
    RowSchema tRowSchema = LoadSchemaUtil.loadSchemaFile(tFileSystem, tSchemaFilePath, mEncoding);

    ResourceSchema tResourceSchema = new ResourceSchema();
    TypeStringCasterPigToPigTypeByte tTypeCaster = TypeStringCasterPigToPigTypeByte.INSTANCE;
    List<ColumnSchema> tColumnSchemaList = tRowSchema.getColumnSchemaList();
    int tSize = tColumnSchemaList.size();
    ResourceFieldSchema[] tResourceFieldSchemas = new ResourceFieldSchema[tSize];
    int tIndex = 0;
    for (ColumnSchema tCurrentColumnSchema : tColumnSchemaList) {
        tResourceFieldSchemas[tIndex++] = new ResourceFieldSchema(new FieldSchema(
                tCurrentColumnSchema.getName(), tTypeCaster.castTypeString(tCurrentColumnSchema.getType())));
    }//  w  ww  . j a va2  s . co m
    tResourceSchema.setFields(tResourceFieldSchemas);
    return tResourceSchema;
}

From source file:kogiri.common.json.JsonSerializer.java

License:Open Source License

public void toJsonFile(FileSystem fs, Path file, Object obj) throws IOException {
    if (!fs.exists(file.getParent())) {
        fs.mkdirs(file.getParent());// w ww  .j av a2  s  . c  o m
    }

    DataOutputStream ostream = fs.create(file, true, 64 * 1024, (short) 3, 1024 * 1024);
    this.mapper.writeValue(ostream, obj);
    ostream.close();
}

From source file:kogiri.mapreduce.libra.kmersimilarity_m.KmerSimilarityMap.java

License:Open Source License

private void commit(Path outputPath, Configuration conf) throws IOException {
    FileSystem fs = outputPath.getFileSystem(conf);

    FileStatus status = fs.getFileStatus(outputPath);
    if (status.isDir()) {
        FileStatus[] entries = fs.listStatus(outputPath);
        for (FileStatus entry : entries) {
            Path entryPath = entry.getPath();

            // remove unnecessary outputs
            if (MapReduceHelper.isLogFiles(entryPath)) {
                fs.delete(entryPath, true);
            } else if (MapReduceHelper.isPartialOutputFiles(entryPath)) {
                // rename outputs
                int mapreduceID = MapReduceHelper.getMapReduceID(entryPath);
                String newName = KmerSimilarityHelper.makeKmerSimilarityResultFileName(mapreduceID);
                Path toPath = new Path(entryPath.getParent(), newName);

                LOG.info("output : " + entryPath.toString());
                LOG.info("renamed to : " + toPath.toString());
                fs.rename(entryPath, toPath);
            } else {
                // let it be
            }/*from www . j  a v  a 2s . c o m*/
        }
    } else {
        throw new IOException("path not found : " + outputPath.toString());
    }
}