List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Check if the file is a parquet file of a log file. Then get the fileId appropriately. *//*w w w. ja v a 2s . c om*/ public static String getFileIdFromFilePath(Path filePath) { if (FSUtils.isLogFile(filePath)) { return FSUtils.getFileIdFromLogPath(filePath); } return FSUtils.getFileId(filePath.getName()); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Get the first part of the file name in the log file. That will be the fileId. Log file do not * have commitTime in the file name.//from ww w .j a v a2s. c o m */ public static String getBaseCommitTimeFromLogPath(Path path) { Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName()); if (!matcher.find()) { throw new InvalidHoodiePathException(path, "LogFile"); } return matcher.group(2); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Get TaskId used in log-path/*ww w . ja v a 2 s . c o m*/ */ public static Integer getTaskPartitionIdFromLogPath(Path path) { Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName()); if (!matcher.find()) { throw new InvalidHoodiePathException(path, "LogFile"); } String val = matcher.group(7); return val == null ? null : Integer.parseInt(val); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Get Write-Token used in log-path//from w w w . ja v a 2 s. com */ public static String getWriteTokenFromLogPath(Path path) { Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName()); if (!matcher.find()) { throw new InvalidHoodiePathException(path, "LogFile"); } return matcher.group(6); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Get StageId used in log-path/*from ww w. j av a 2 s. c o m*/ */ public static Integer getStageIdFromLogPath(Path path) { Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName()); if (!matcher.find()) { throw new InvalidHoodiePathException(path, "LogFile"); } String val = matcher.group(8); return val == null ? null : Integer.parseInt(val); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Get Task Attempt Id used in log-path//from w w w . jav a 2 s. c om */ public static Integer getTaskAttemptIdFromLogPath(Path path) { Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName()); if (!matcher.find()) { throw new InvalidHoodiePathException(path, "LogFile"); } String val = matcher.group(9); return val == null ? null : Integer.parseInt(val); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Get the last part of the file name in the log file and convert to int. *///from w w w. ja v a 2 s. co m public static int getFileVersionFromLog(Path logPath) { Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName()); if (!matcher.find()) { throw new InvalidHoodiePathException(logPath, "LogFile"); } return Integer.parseInt(matcher.group(4)); }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
public static boolean isLogFile(Path logPath) { Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName()); if (!matcher.find()) { return false; }/*from w ww .ja va 2 s .c o m*/ return true; }
From source file:com.uber.hoodie.hive.client.HoodieFSClient.java
License:Apache License
/** * Find the last data file under the partition path. * * @param metadata/*from ww w . j a v a 2 s .c o m*/ * @param partitionPathString * @return */ public Path lastDataFileForDataset(HoodieDatasetReference metadata, String partitionPathString) { try { Path partitionPath = new Path(partitionPathString); if (!fs.exists(partitionPath)) { throw new HoodieHiveDatasetException( "Partition path " + partitionPath + " not found in Dataset " + metadata); } RemoteIterator<LocatedFileStatus> files = fs.listFiles(partitionPath, true); // Iterate over the list. List is generally is listed in chronological order becasue of the date partitions // Get the latest schema Path returnPath = null; while (files.hasNext()) { Path path = files.next().getPath(); if (path.getName().endsWith(PARQUET_EXTENSION) || path.getName().endsWith(PARQUET_EXTENSION_ZIPPED)) { if (returnPath == null || path.toString().compareTo(returnPath.toString()) > 0) { returnPath = path; } } } if (returnPath != null) { return returnPath; } throw new HoodieHiveDatasetException( "No data file found in path " + partitionPath + " for dataset " + metadata); } catch (IOException e) { throw new HoodieHiveDatasetException( "Failed to get data file in path " + partitionPathString + " for dataset " + metadata, e); } }
From source file:com.uber.hoodie.index.TestHoodieBloomIndex.java
License:Apache License
@Test public void testCheckExists() throws Exception { // We have some records to be tagged (two different partitions) String schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8"); Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr)); String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}"; String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}"; String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}"; String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}"; TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1); HoodieKey key1 = new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()); HoodieRecord record1 = new HoodieRecord(key1, rowChange1); TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2); HoodieKey key2 = new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()); HoodieRecord record2 = new HoodieRecord(key2, rowChange2); TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3); HoodieKey key3 = new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()); HoodieRecord record3 = new HoodieRecord(key3, rowChange3); TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4); HoodieKey key4 = new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()); HoodieRecord record4 = new HoodieRecord(key4, rowChange4); JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4)); // Also create the metadata and config HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build(); HoodieTable table = HoodieTable.getHoodieTable(metadata, config); // Let's tag// w w w .j a v a 2 s. c o m HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc); JavaPairRDD<HoodieKey, Optional<String>> taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table); // Should not find any files for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) { assertTrue(!record._2.isPresent()); } // We create three parquet file, each having one record. (two different partitions) String filename1 = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, null, true); String filename2 = writeParquetFile("2016/01/31", Arrays.asList(record2), schema, null, true); String filename3 = writeParquetFile("2015/01/31", Arrays.asList(record4), schema, null, true); // We do the tag again metadata = new HoodieTableMetaClient(fs, basePath); table = HoodieTable.getHoodieTable(metadata, config); taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table); // Check results for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) { if (record._1.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) { assertTrue(record._2.isPresent()); Path path1 = new Path(record._2.get()); assertEquals(FSUtils.getFileId(filename1), FSUtils.getFileId(path1.getName())); } else if (record._1.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) { assertTrue(record._2.isPresent()); Path path2 = new Path(record._2.get()); assertEquals(FSUtils.getFileId(filename2), FSUtils.getFileId(path2.getName())); } else if (record._1.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) { assertTrue(!record._2.isPresent()); } else if (record._1.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) { assertTrue(record._2.isPresent()); Path path3 = new Path(record._2.get()); assertEquals(FSUtils.getFileId(filename3), FSUtils.getFileId(path3.getName())); } } }