List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java
License:Apache License
/** * Get the metadata for a single file/*w w w. j a v a 2s .com*/ */ private ParquetFileMetadata_v3 getParquetFileMetadata_v3(ParquetTableMetadata_v3 parquetTableMetadata, final FileStatus file, final FileSystem fs, boolean allColumns, Set<String> columnSet) throws IOException, InterruptedException { final ParquetMetadata metadata; final UserGroupInformation processUserUgi = ImpersonationUtil.getProcessUserUGI(); final Configuration conf = new Configuration(fs.getConf()); try { metadata = processUserUgi.doAs((PrivilegedExceptionAction<ParquetMetadata>) () -> { try (ParquetFileReader parquetFileReader = ParquetFileReader .open(HadoopInputFile.fromStatus(file, conf), readerConfig.toReadOptions())) { return parquetFileReader.getFooter(); } }); } catch (Exception e) { logger.error( "Exception while reading footer of parquet file [Details - path: {}, owner: {}] as process user {}", file.getPath(), file.getOwner(), processUserUgi.getShortUserName(), e); throw e; } MessageType schema = metadata.getFileMetaData().getSchema(); Map<SchemaPath, ColTypeInfo> colTypeInfoMap = new HashMap<>(); schema.getPaths(); for (String[] path : schema.getPaths()) { colTypeInfoMap.put(SchemaPath.getCompoundPath(path), getColTypeInfo(schema, schema, path, 0)); } List<RowGroupMetadata_v3> rowGroupMetadataList = Lists.newArrayList(); ArrayList<SchemaPath> ALL_COLS = new ArrayList<>(); ALL_COLS.add(SchemaPath.STAR_COLUMN); ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility .detectCorruptDates(metadata, ALL_COLS, readerConfig.autoCorrectCorruptedDates()); logger.debug("Contains corrupt dates: {}.", containsCorruptDates); for (BlockMetaData rowGroup : metadata.getBlocks()) { List<ColumnMetadata_v3> columnMetadataList = new ArrayList<>(); long length = 0; for (ColumnChunkMetaData col : rowGroup.getColumns()) { String[] columnName = col.getPath().toArray(); SchemaPath columnSchemaName = SchemaPath.getCompoundPath(columnName); ColTypeInfo colTypeInfo = colTypeInfoMap.get(columnSchemaName); ColumnTypeMetadata_v3 columnTypeMetadata = new ColumnTypeMetadata_v3(columnName, col.getPrimitiveType().getPrimitiveTypeName(), colTypeInfo.originalType, colTypeInfo.precision, colTypeInfo.scale, colTypeInfo.repetitionLevel, colTypeInfo.definitionLevel); if (parquetTableMetadata.columnTypeInfo == null) { parquetTableMetadata.columnTypeInfo = new ConcurrentHashMap<>(); } parquetTableMetadata.columnTypeInfo.put(new ColumnTypeMetadata_v3.Key(columnTypeMetadata.name), columnTypeMetadata); // Store column metadata only if allColumns is set to true or if the column belongs to the subset of columns specified in the refresh command if (allColumns || columnSet == null || !allColumns && columnSet != null && columnSet.size() > 0 && columnSet.contains(columnSchemaName.getRootSegmentPath())) { Statistics<?> stats = col.getStatistics(); // Save the column schema info. We'll merge it into one list Object minValue = null; Object maxValue = null; long numNulls = -1; boolean statsAvailable = stats != null && !stats.isEmpty(); if (statsAvailable) { if (stats.hasNonNullValue()) { minValue = stats.genericGetMin(); maxValue = stats.genericGetMax(); if (containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION && columnTypeMetadata.originalType == OriginalType.DATE) { minValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) minValue); maxValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) maxValue); } } numNulls = stats.getNumNulls(); } ColumnMetadata_v3 columnMetadata = new ColumnMetadata_v3(columnTypeMetadata.name, col.getPrimitiveType().getPrimitiveTypeName(), minValue, maxValue, numNulls); columnMetadataList.add(columnMetadata); } length += col.getTotalSize(); } // DRILL-5009: Skip the RowGroup if it is empty // Note we still read the schema even if there are no values in the RowGroup if (rowGroup.getRowCount() == 0) { continue; } RowGroupMetadata_v3 rowGroupMeta = new RowGroupMetadata_v3(rowGroup.getStartingPos(), length, rowGroup.getRowCount(), getHostAffinity(file, fs, rowGroup.getStartingPos(), length), columnMetadataList); rowGroupMetadataList.add(rowGroupMeta); } Path path = Path.getPathWithoutSchemeAndAuthority(file.getPath()); return new ParquetFileMetadata_v3(path, file.getLen(), rowGroupMetadataList); }
From source file:org.apache.drill.exec.store.parquet.Metadata.java
License:Apache License
private Metadata(FileSystem fs) { this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf()); }
From source file:org.apache.drill.exec.store.schedule.BlockMapBuilder.java
License:Apache License
public BlockMapBuilder(FileSystem fs, Collection<DrillbitEndpoint> endpoints) { this.fs = fs; this.codecFactory = new CompressionCodecFactory(fs.getConf()); this.endPointMap = buildEndpointMap(endpoints); }
From source file:org.apache.drill.test.framework.TestDriver.java
License:Apache License
public void generateReports(List<DrillTest> tests, int iteration) { try {/*w w w . j a va 2 s .co m*/ if (drillReportsDir == null) { drillReportsDir = CWD; } File drillReportDir = new File(drillReportsDir); FileSystem localFS = FileSystem.getLocal(conf); FileSystem DFS = FileSystem.get(conf); if (!drillReportDir.exists()) { if (!drillReportDir.mkdir()) { LOG.debug("Cannot create directory " + drillReportsDir + ". Using current working directory for drill output"); drillReportsDir = CWD; } } File reportFile = new File(drillReportsDir + "/apache-drill-" + version + "_" + commitId + "_" + "report_" + new Date().toString().replace(' ', '_').replace(':', '_') + ".json"); BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(reportFile)); Document document; for (DrillTest test : tests) { document = Json.newDocument(); document.set("_id", test.getTestId() + "_" + new File(test.getInputFile()).getName() + "_" + test.getCloneId() + "_" + iteration); document.set("queryFilepath", test.getInputFile().substring(test.getInputFile().indexOf("resources/") + 10)); String query = test.getQuery(); if (query != null) { query.replaceAll("\n", ""); } document.set("query", query); document.set("status", test.getTestStatus().toString()); if (test.getTestStatus().equals(TestStatus.EXECUTION_FAILURE) || test.getTestStatus().equals(TestStatus.VERIFICATION_FAILURE)) { document.set("errorMessage", test.getException().toString().replaceAll("\n", "")); } else { document.set("errorMessage", "N/A"); } document.set("queryExecutionTime", test.getDuration().toString()); document.set("drillVersion", version); document.set("commitId", commitId); bufferedWriter.write(document.toString()); bufferedWriter.newLine(); } bufferedWriter.flush(); bufferedWriter.close(); // Upload report to DFS if the drillReportsDFSDir variable is set if (drillReportsDFSDir != null) { FileUtil.copy(localFS, new Path(reportFile.getAbsolutePath()), DFS, new Path(drillReportsDFSDir + "/" + reportFile.getName()), true, false, DFS.getConf()); } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.apache.drill.test.framework.TestDriver.java
License:Apache License
private static void dfsCopy(Path src, Path dest, String fsMode) throws IOException { FileSystem fs; FileSystem localFs = FileSystem.getLocal(conf); if (fsMode.equals(LOCALFS)) { fs = FileSystem.getLocal(conf); } else {// w w w .j av a2s .c o m fs = FileSystem.get(conf); } try { if (localFs.getFileStatus(src).isDirectory()) { for (FileStatus file : localFs.listStatus(src)) { Path srcChild = file.getPath(); Path newDest = new Path(dest + "/" + srcChild.getName()); dfsCopy(srcChild, newDest, fsMode); } } else { if (!fs.exists(dest.getParent())) { fs.mkdirs(dest.getParent()); } if (!fs.exists(dest)) { FileUtil.copy(localFs, src, fs, dest, false, fs.getConf()); LOG.debug("Copying file " + src + " to " + dest); } else { LOG.debug("File " + src + " already exists as " + dest); } } } catch (FileAlreadyExistsException e) { LOG.debug("File " + src + " already exists as " + dest); } catch (IOException e) { LOG.debug("File " + src + " already exists as " + dest); } }
From source file:org.apache.falcon.extensions.store.ExtensionStore.java
License:Apache License
public String getExtensionResource(final String resourcePath) throws FalconException { if (StringUtils.isBlank(resourcePath)) { throw new StoreAccessException("Resource path cannot be null or empty"); }//from w ww . j ava 2s . c o m try { Path resourceFile = new Path(resourcePath); InputStream data; ByteArrayOutputStream writer = new ByteArrayOutputStream(); if (resourcePath.startsWith("file")) { data = fs.open(resourceFile); IOUtils.copyBytes(data, writer, fs.getConf(), true); } else { FileSystem fileSystem = getHdfsFileSystem(resourcePath); data = fileSystem.open(resourceFile); IOUtils.copyBytes(data, writer, fileSystem.getConf(), true); } return writer.toString(); } catch (IOException e) { throw new StoreAccessException(e); } }
From source file:org.apache.falcon.hadoop.HadoopClientFactory.java
License:Apache License
public static void mkdirsWithDefaultPerms(FileSystem fs, Path path) throws IOException { mkdirs(fs, path, getDirDefaultPermission(fs.getConf())); }
From source file:org.apache.falcon.oozie.feed.OozieFeedWorkflowBuilderTest.java
License:Apache License
@Test(dataProvider = "uMaskOptions") public void testRetentionCoords(String umask) throws Exception { FileSystem fs = srcMiniDFS.getFileSystem(); Configuration conf = fs.getConf(); conf.set("fs.permissions.umask-mode", umask); OozieEntityBuilder feedBuilder = OozieEntityBuilder.get(feed); Path bundlePath = new Path("/projects/falcon/"); feedBuilder.build(trgCluster, bundlePath); // ClusterHelper constructs new fs Conf. Add it to cluster properties so that it gets added to FS conf setUmaskInFsConf(srcCluster, umask); org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, srcCluster.getName()); Calendar startCal = Calendar.getInstance(); Calendar endCal = Calendar.getInstance(); endCal.add(Calendar.DATE, 1); cluster.getValidity().setEnd(endCal.getTime()); RuntimeProperties.get().setProperty("falcon.retention.keep.instances.beyond.validity", "false"); OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(feed, Tag.RETENTION); List<Properties> coords = builder.buildCoords(srcCluster, new Path("/projects/falcon/" + umask)); COORDINATORAPP coord = getCoordinator(srcMiniDFS, coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH)); Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/" + umask + "/RETENTION"); Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + feed.getName()); Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}"); Assert.assertEquals(coord.getStart(), DateUtil.getDateFormatFromTime(startCal.getTimeInMillis())); Date endDate = DateUtils.addSeconds(endCal.getTime(), FeedHelper.getRetentionLimitInSeconds(feed, srcCluster.getName())); Assert.assertEquals(coord.getEnd(), DateUtil.getDateFormatFromTime(endDate.getTime())); HashMap<String, String> props = getCoordProperties(coord); HashMap<String, String> wfProps = getWorkflowProperties(fs, coord); String feedDataPath = wfProps.get("feedDataPath"); String storageType = wfProps.get("falconFeedStorageType"); // verify the param that feed evictor depends on Assert.assertEquals(storageType, Storage.TYPE.FILESYSTEM.name()); final Storage storage = FeedHelper.createStorage(cluster, feed); if (feedDataPath != null) { Assert.assertEquals(feedDataPath, storage.getUriTemplate().replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX)); }//w w w.j av a 2 s. co m if (storageType != null) { Assert.assertEquals(storageType, storage.getType().name()); } // verify the post processing params Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName()), feed.getName()); Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_NAMES.getName()), feed.getName()); Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName()), "IGNORE"); assertWorkflowRetries(getWorkflowapp(srcMiniDFS.getFileSystem(), coord)); try { verifyClusterLocationsUMask(srcCluster, fs); verifyWorkflowUMask(fs, coord, umask); } finally { cleanupWorkflowState(fs, coord); FileSystem.closeAll(); } }
From source file:org.apache.falcon.oozie.feed.OozieFeedWorkflowBuilderTest.java
License:Apache License
@Test(dataProvider = "secureOptions") public void testRetentionCoordsForTable(String secureOption) throws Exception { StartupProperties.get().setProperty("falcon.postprocessing.enable", "true"); StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption); final String umask = "000"; FileSystem fs = trgMiniDFS.getFileSystem(); Configuration conf = fs.getConf(); conf.set("fs.permissions.umask-mode", umask); // ClusterHelper constructs new fs Conf. Add it to cluster properties so that it gets added to FS conf setUmaskInFsConf(trgCluster, umask); org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(tableFeed, trgCluster.getName()); final Calendar instance = Calendar.getInstance(); instance.add(Calendar.YEAR, 1); cluster.getValidity().setEnd(instance.getTime()); OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(tableFeed, Tag.RETENTION); List<Properties> coords = builder.buildCoords(trgCluster, new Path("/projects/falcon/")); COORDINATORAPP coord = getCoordinator(trgMiniDFS, coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH)); Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/RETENTION"); Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + tableFeed.getName()); Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}"); HashMap<String, String> props = getCoordProperties(coord); HashMap<String, String> wfProps = getWorkflowProperties(fs, coord); String feedDataPath = wfProps.get("feedDataPath"); String storageType = wfProps.get("falconFeedStorageType"); // verify the param that feed evictor depends on Assert.assertEquals(storageType, Storage.TYPE.TABLE.name()); final Storage storage = FeedHelper.createStorage(cluster, tableFeed); if (feedDataPath != null) { Assert.assertEquals(feedDataPath, storage.getUriTemplate().replaceAll(Storage.DOLLAR_EXPR_START_REGEX, Storage.QUESTION_EXPR_START_REGEX)); }/* w w w. j a v a2 s .co m*/ if (storageType != null) { Assert.assertEquals(storageType, storage.getType().name()); } // verify the post processing params Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName()), tableFeed.getName()); Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_NAMES.getName()), tableFeed.getName()); Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName()), "IGNORE"); assertWorkflowRetries(coord); verifyBrokerProperties(srcCluster, wfProps); verifyEntityProperties(tableFeed, trgCluster, WorkflowExecutionContext.EntityOperations.DELETE, wfProps); Assert.assertTrue(Storage.TYPE.TABLE == FeedHelper.getStorageType(tableFeed, trgCluster)); assertHCatCredentials(getWorkflowapp(trgMiniDFS.getFileSystem(), coord), coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "")); try { verifyClusterLocationsUMask(trgCluster, fs); verifyWorkflowUMask(fs, coord, umask); } finally { cleanupWorkflowState(fs, coord); FileSystem.closeAll(); } }
From source file:org.apache.falcon.oozie.feed.OozieFeedWorkflowBuilderTest.java
License:Apache License
private void verifyWorkflowUMask(FileSystem fs, COORDINATORAPP coord, String defaultUMask) throws IOException { Assert.assertEquals(fs.getConf().get("fs.permissions.umask-mode"), defaultUMask); String appPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""); Path wfPath = new Path(appPath); FileStatus[] fileStatuses = fs.listStatus(wfPath); for (FileStatus fileStatus : fileStatuses) { Assert.assertEquals(fileStatus.getOwner(), CurrentUser.getProxyUGI().getShortUserName()); final FsPermission permission = fileStatus.getPermission(); if (!fileStatus.isDirectory()) { Assert.assertEquals(permission.toString(), HadoopClientFactory.getFileDefaultPermission(fs.getConf()).toString()); }//from w ww. j a v a2s .c o m } }