Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Check if the file is a parquet file of a log file. Then get the fileId appropriately.
 *//*w w  w.  ja  v a 2s  . c om*/
public static String getFileIdFromFilePath(Path filePath) {
    if (FSUtils.isLogFile(filePath)) {
        return FSUtils.getFileIdFromLogPath(filePath);
    }
    return FSUtils.getFileId(filePath.getName());
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Get the first part of the file name in the log file. That will be the fileId. Log file do not
 * have commitTime in the file name.//from ww  w  .j  a v a2s. c o  m
 */
public static String getBaseCommitTimeFromLogPath(Path path) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
    if (!matcher.find()) {
        throw new InvalidHoodiePathException(path, "LogFile");
    }
    return matcher.group(2);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Get TaskId used in log-path/*ww  w  .  ja v  a 2 s .  c o  m*/
 */
public static Integer getTaskPartitionIdFromLogPath(Path path) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
    if (!matcher.find()) {
        throw new InvalidHoodiePathException(path, "LogFile");
    }
    String val = matcher.group(7);
    return val == null ? null : Integer.parseInt(val);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Get Write-Token used in log-path//from w w  w  . ja  v a 2 s.  com
 */
public static String getWriteTokenFromLogPath(Path path) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
    if (!matcher.find()) {
        throw new InvalidHoodiePathException(path, "LogFile");
    }
    return matcher.group(6);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Get StageId used in log-path/*from ww w.  j av  a  2  s. c o  m*/
 */
public static Integer getStageIdFromLogPath(Path path) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
    if (!matcher.find()) {
        throw new InvalidHoodiePathException(path, "LogFile");
    }
    String val = matcher.group(8);
    return val == null ? null : Integer.parseInt(val);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Get Task Attempt Id used in log-path//from  w w  w  .  jav  a 2 s.  c om
 */
public static Integer getTaskAttemptIdFromLogPath(Path path) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
    if (!matcher.find()) {
        throw new InvalidHoodiePathException(path, "LogFile");
    }
    String val = matcher.group(9);
    return val == null ? null : Integer.parseInt(val);
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Get the last part of the file name in the log file and convert to int.
 *///from  w w  w. ja  v  a  2  s.  co m
public static int getFileVersionFromLog(Path logPath) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
    if (!matcher.find()) {
        throw new InvalidHoodiePathException(logPath, "LogFile");
    }
    return Integer.parseInt(matcher.group(4));
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

public static boolean isLogFile(Path logPath) {
    Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
    if (!matcher.find()) {
        return false;
    }/*from w ww  .ja  va  2 s  .c  o m*/
    return true;
}

From source file:com.uber.hoodie.hive.client.HoodieFSClient.java

License:Apache License

/**
 * Find the last data file under the partition path.
 *
 * @param metadata/*from ww  w  .  j  a v  a 2 s .c o m*/
 * @param partitionPathString
 * @return
 */
public Path lastDataFileForDataset(HoodieDatasetReference metadata, String partitionPathString) {
    try {
        Path partitionPath = new Path(partitionPathString);
        if (!fs.exists(partitionPath)) {
            throw new HoodieHiveDatasetException(
                    "Partition path " + partitionPath + " not found in Dataset " + metadata);
        }

        RemoteIterator<LocatedFileStatus> files = fs.listFiles(partitionPath, true);
        // Iterate over the list. List is generally is listed in chronological order becasue of the date partitions
        // Get the latest schema
        Path returnPath = null;
        while (files.hasNext()) {
            Path path = files.next().getPath();
            if (path.getName().endsWith(PARQUET_EXTENSION)
                    || path.getName().endsWith(PARQUET_EXTENSION_ZIPPED)) {
                if (returnPath == null || path.toString().compareTo(returnPath.toString()) > 0) {
                    returnPath = path;
                }
            }
        }
        if (returnPath != null) {
            return returnPath;
        }
        throw new HoodieHiveDatasetException(
                "No data file found in path " + partitionPath + " for dataset " + metadata);
    } catch (IOException e) {
        throw new HoodieHiveDatasetException(
                "Failed to get data file in path " + partitionPathString + " for dataset " + metadata, e);
    }
}

From source file:com.uber.hoodie.index.TestHoodieBloomIndex.java

License:Apache License

@Test
public void testCheckExists() throws Exception {
    // We have some records to be tagged (two different partitions)
    String schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8");
    Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr));

    String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
    String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
    String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
    String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
    HoodieKey key1 = new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath());
    HoodieRecord record1 = new HoodieRecord(key1, rowChange1);
    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
    HoodieKey key2 = new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath());
    HoodieRecord record2 = new HoodieRecord(key2, rowChange2);
    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
    HoodieKey key3 = new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath());
    HoodieRecord record3 = new HoodieRecord(key3, rowChange3);
    TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
    HoodieKey key4 = new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath());
    HoodieRecord record4 = new HoodieRecord(key4, rowChange4);
    JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));

    // Also create the metadata and config
    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);

    // Let's tag// w  w w .j  a v a 2 s. c o m
    HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
    JavaPairRDD<HoodieKey, Optional<String>> taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table);

    // Should not find any files
    for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) {
        assertTrue(!record._2.isPresent());
    }

    // We create three parquet file, each having one record. (two different partitions)
    String filename1 = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, null, true);
    String filename2 = writeParquetFile("2016/01/31", Arrays.asList(record2), schema, null, true);
    String filename3 = writeParquetFile("2015/01/31", Arrays.asList(record4), schema, null, true);

    // We do the tag again
    metadata = new HoodieTableMetaClient(fs, basePath);
    table = HoodieTable.getHoodieTable(metadata, config);
    taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table);

    // Check results
    for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) {
        if (record._1.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
            assertTrue(record._2.isPresent());
            Path path1 = new Path(record._2.get());
            assertEquals(FSUtils.getFileId(filename1), FSUtils.getFileId(path1.getName()));
        } else if (record._1.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
            assertTrue(record._2.isPresent());
            Path path2 = new Path(record._2.get());
            assertEquals(FSUtils.getFileId(filename2), FSUtils.getFileId(path2.getName()));
        } else if (record._1.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
            assertTrue(!record._2.isPresent());
        } else if (record._1.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
            assertTrue(record._2.isPresent());
            Path path3 = new Path(record._2.get());
            assertEquals(FSUtils.getFileId(filename3), FSUtils.getFileId(path3.getName()));
        }
    }
}