Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java

License:Apache License

/**
 * Get the metadata for a single file/*w  w  w.  j  a v  a  2s .com*/
 */
private ParquetFileMetadata_v3 getParquetFileMetadata_v3(ParquetTableMetadata_v3 parquetTableMetadata,
        final FileStatus file, final FileSystem fs, boolean allColumns, Set<String> columnSet)
        throws IOException, InterruptedException {
    final ParquetMetadata metadata;
    final UserGroupInformation processUserUgi = ImpersonationUtil.getProcessUserUGI();
    final Configuration conf = new Configuration(fs.getConf());
    try {
        metadata = processUserUgi.doAs((PrivilegedExceptionAction<ParquetMetadata>) () -> {
            try (ParquetFileReader parquetFileReader = ParquetFileReader
                    .open(HadoopInputFile.fromStatus(file, conf), readerConfig.toReadOptions())) {
                return parquetFileReader.getFooter();
            }
        });
    } catch (Exception e) {
        logger.error(
                "Exception while reading footer of parquet file [Details - path: {}, owner: {}] as process user {}",
                file.getPath(), file.getOwner(), processUserUgi.getShortUserName(), e);
        throw e;
    }

    MessageType schema = metadata.getFileMetaData().getSchema();

    Map<SchemaPath, ColTypeInfo> colTypeInfoMap = new HashMap<>();
    schema.getPaths();
    for (String[] path : schema.getPaths()) {
        colTypeInfoMap.put(SchemaPath.getCompoundPath(path), getColTypeInfo(schema, schema, path, 0));
    }

    List<RowGroupMetadata_v3> rowGroupMetadataList = Lists.newArrayList();

    ArrayList<SchemaPath> ALL_COLS = new ArrayList<>();
    ALL_COLS.add(SchemaPath.STAR_COLUMN);
    ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility
            .detectCorruptDates(metadata, ALL_COLS, readerConfig.autoCorrectCorruptedDates());
    logger.debug("Contains corrupt dates: {}.", containsCorruptDates);

    for (BlockMetaData rowGroup : metadata.getBlocks()) {
        List<ColumnMetadata_v3> columnMetadataList = new ArrayList<>();
        long length = 0;
        for (ColumnChunkMetaData col : rowGroup.getColumns()) {
            String[] columnName = col.getPath().toArray();
            SchemaPath columnSchemaName = SchemaPath.getCompoundPath(columnName);
            ColTypeInfo colTypeInfo = colTypeInfoMap.get(columnSchemaName);

            ColumnTypeMetadata_v3 columnTypeMetadata = new ColumnTypeMetadata_v3(columnName,
                    col.getPrimitiveType().getPrimitiveTypeName(), colTypeInfo.originalType,
                    colTypeInfo.precision, colTypeInfo.scale, colTypeInfo.repetitionLevel,
                    colTypeInfo.definitionLevel);

            if (parquetTableMetadata.columnTypeInfo == null) {
                parquetTableMetadata.columnTypeInfo = new ConcurrentHashMap<>();
            }
            parquetTableMetadata.columnTypeInfo.put(new ColumnTypeMetadata_v3.Key(columnTypeMetadata.name),
                    columnTypeMetadata);
            // Store column metadata only if allColumns is set to true or if the column belongs to the subset of columns specified in the refresh command
            if (allColumns || columnSet == null || !allColumns && columnSet != null && columnSet.size() > 0
                    && columnSet.contains(columnSchemaName.getRootSegmentPath())) {
                Statistics<?> stats = col.getStatistics();
                // Save the column schema info. We'll merge it into one list
                Object minValue = null;
                Object maxValue = null;
                long numNulls = -1;
                boolean statsAvailable = stats != null && !stats.isEmpty();
                if (statsAvailable) {
                    if (stats.hasNonNullValue()) {
                        minValue = stats.genericGetMin();
                        maxValue = stats.genericGetMax();
                        if (containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION
                                && columnTypeMetadata.originalType == OriginalType.DATE) {
                            minValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) minValue);
                            maxValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) maxValue);
                        }
                    }
                    numNulls = stats.getNumNulls();
                }
                ColumnMetadata_v3 columnMetadata = new ColumnMetadata_v3(columnTypeMetadata.name,
                        col.getPrimitiveType().getPrimitiveTypeName(), minValue, maxValue, numNulls);
                columnMetadataList.add(columnMetadata);
            }
            length += col.getTotalSize();
        }

        // DRILL-5009: Skip the RowGroup if it is empty
        // Note we still read the schema even if there are no values in the RowGroup
        if (rowGroup.getRowCount() == 0) {
            continue;
        }
        RowGroupMetadata_v3 rowGroupMeta = new RowGroupMetadata_v3(rowGroup.getStartingPos(), length,
                rowGroup.getRowCount(), getHostAffinity(file, fs, rowGroup.getStartingPos(), length),
                columnMetadataList);

        rowGroupMetadataList.add(rowGroupMeta);
    }
    Path path = Path.getPathWithoutSchemeAndAuthority(file.getPath());

    return new ParquetFileMetadata_v3(path, file.getLen(), rowGroupMetadataList);
}

From source file:org.apache.drill.exec.store.parquet.Metadata.java

License:Apache License

private Metadata(FileSystem fs) {
    this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf());
}

From source file:org.apache.drill.exec.store.schedule.BlockMapBuilder.java

License:Apache License

public BlockMapBuilder(FileSystem fs, Collection<DrillbitEndpoint> endpoints) {
    this.fs = fs;
    this.codecFactory = new CompressionCodecFactory(fs.getConf());
    this.endPointMap = buildEndpointMap(endpoints);
}

From source file:org.apache.drill.test.framework.TestDriver.java

License:Apache License

public void generateReports(List<DrillTest> tests, int iteration) {

    try {/*w w w .  j  a va  2  s  .co  m*/
        if (drillReportsDir == null) {
            drillReportsDir = CWD;
        }

        File drillReportDir = new File(drillReportsDir);
        FileSystem localFS = FileSystem.getLocal(conf);
        FileSystem DFS = FileSystem.get(conf);

        if (!drillReportDir.exists()) {
            if (!drillReportDir.mkdir()) {
                LOG.debug("Cannot create directory " + drillReportsDir
                        + ".  Using current working directory for drill output");
                drillReportsDir = CWD;
            }
        }

        File reportFile = new File(drillReportsDir + "/apache-drill-" + version + "_" + commitId + "_"
                + "report_" + new Date().toString().replace(' ', '_').replace(':', '_') + ".json");

        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(reportFile));
        Document document;
        for (DrillTest test : tests) {
            document = Json.newDocument();
            document.set("_id", test.getTestId() + "_" + new File(test.getInputFile()).getName() + "_"
                    + test.getCloneId() + "_" + iteration);
            document.set("queryFilepath",
                    test.getInputFile().substring(test.getInputFile().indexOf("resources/") + 10));
            String query = test.getQuery();
            if (query != null) {
                query.replaceAll("\n", "");
            }
            document.set("query", query);
            document.set("status", test.getTestStatus().toString());
            if (test.getTestStatus().equals(TestStatus.EXECUTION_FAILURE)
                    || test.getTestStatus().equals(TestStatus.VERIFICATION_FAILURE)) {
                document.set("errorMessage", test.getException().toString().replaceAll("\n", ""));
            } else {
                document.set("errorMessage", "N/A");
            }
            document.set("queryExecutionTime", test.getDuration().toString());
            document.set("drillVersion", version);
            document.set("commitId", commitId);
            bufferedWriter.write(document.toString());
            bufferedWriter.newLine();
        }

        bufferedWriter.flush();
        bufferedWriter.close();

        // Upload report to DFS if the drillReportsDFSDir variable is set
        if (drillReportsDFSDir != null) {
            FileUtil.copy(localFS, new Path(reportFile.getAbsolutePath()), DFS,
                    new Path(drillReportsDFSDir + "/" + reportFile.getName()), true, false, DFS.getConf());
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.apache.drill.test.framework.TestDriver.java

License:Apache License

private static void dfsCopy(Path src, Path dest, String fsMode) throws IOException {

    FileSystem fs;
    FileSystem localFs = FileSystem.getLocal(conf);

    if (fsMode.equals(LOCALFS)) {
        fs = FileSystem.getLocal(conf);
    } else {// w w w  .j av a2s  .c  o  m
        fs = FileSystem.get(conf);
    }

    try {
        if (localFs.getFileStatus(src).isDirectory()) {
            for (FileStatus file : localFs.listStatus(src)) {
                Path srcChild = file.getPath();
                Path newDest = new Path(dest + "/" + srcChild.getName());
                dfsCopy(srcChild, newDest, fsMode);
            }
        } else {
            if (!fs.exists(dest.getParent())) {
                fs.mkdirs(dest.getParent());
            }
            if (!fs.exists(dest)) {
                FileUtil.copy(localFs, src, fs, dest, false, fs.getConf());
                LOG.debug("Copying file " + src + " to " + dest);
            } else {
                LOG.debug("File " + src + " already exists as " + dest);
            }
        }
    } catch (FileAlreadyExistsException e) {
        LOG.debug("File " + src + " already exists as " + dest);
    } catch (IOException e) {
        LOG.debug("File " + src + " already exists as " + dest);
    }
}

From source file:org.apache.falcon.extensions.store.ExtensionStore.java

License:Apache License

public String getExtensionResource(final String resourcePath) throws FalconException {
    if (StringUtils.isBlank(resourcePath)) {
        throw new StoreAccessException("Resource path cannot be null or empty");
    }//from   w  ww .  j  ava  2s  .  c  o m

    try {
        Path resourceFile = new Path(resourcePath);
        InputStream data;

        ByteArrayOutputStream writer = new ByteArrayOutputStream();
        if (resourcePath.startsWith("file")) {
            data = fs.open(resourceFile);
            IOUtils.copyBytes(data, writer, fs.getConf(), true);
        } else {
            FileSystem fileSystem = getHdfsFileSystem(resourcePath);
            data = fileSystem.open(resourceFile);
            IOUtils.copyBytes(data, writer, fileSystem.getConf(), true);
        }
        return writer.toString();
    } catch (IOException e) {
        throw new StoreAccessException(e);
    }
}

From source file:org.apache.falcon.hadoop.HadoopClientFactory.java

License:Apache License

public static void mkdirsWithDefaultPerms(FileSystem fs, Path path) throws IOException {
    mkdirs(fs, path, getDirDefaultPermission(fs.getConf()));
}

From source file:org.apache.falcon.oozie.feed.OozieFeedWorkflowBuilderTest.java

License:Apache License

@Test(dataProvider = "uMaskOptions")
public void testRetentionCoords(String umask) throws Exception {
    FileSystem fs = srcMiniDFS.getFileSystem();
    Configuration conf = fs.getConf();
    conf.set("fs.permissions.umask-mode", umask);

    OozieEntityBuilder feedBuilder = OozieEntityBuilder.get(feed);
    Path bundlePath = new Path("/projects/falcon/");
    feedBuilder.build(trgCluster, bundlePath);

    // ClusterHelper constructs new fs Conf. Add it to cluster properties so that it gets added to FS conf
    setUmaskInFsConf(srcCluster, umask);

    org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(feed, srcCluster.getName());
    Calendar startCal = Calendar.getInstance();
    Calendar endCal = Calendar.getInstance();
    endCal.add(Calendar.DATE, 1);
    cluster.getValidity().setEnd(endCal.getTime());
    RuntimeProperties.get().setProperty("falcon.retention.keep.instances.beyond.validity", "false");

    OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(feed, Tag.RETENTION);
    List<Properties> coords = builder.buildCoords(srcCluster, new Path("/projects/falcon/" + umask));
    COORDINATORAPP coord = getCoordinator(srcMiniDFS,
            coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));

    Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(),
            "${nameNode}/projects/falcon/" + umask + "/RETENTION");
    Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + feed.getName());
    Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}");

    Assert.assertEquals(coord.getStart(), DateUtil.getDateFormatFromTime(startCal.getTimeInMillis()));
    Date endDate = DateUtils.addSeconds(endCal.getTime(),
            FeedHelper.getRetentionLimitInSeconds(feed, srcCluster.getName()));
    Assert.assertEquals(coord.getEnd(), DateUtil.getDateFormatFromTime(endDate.getTime()));

    HashMap<String, String> props = getCoordProperties(coord);

    HashMap<String, String> wfProps = getWorkflowProperties(fs, coord);

    String feedDataPath = wfProps.get("feedDataPath");
    String storageType = wfProps.get("falconFeedStorageType");

    // verify the param that feed evictor depends on

    Assert.assertEquals(storageType, Storage.TYPE.FILESYSTEM.name());

    final Storage storage = FeedHelper.createStorage(cluster, feed);
    if (feedDataPath != null) {
        Assert.assertEquals(feedDataPath, storage.getUriTemplate().replaceAll(Storage.DOLLAR_EXPR_START_REGEX,
                Storage.QUESTION_EXPR_START_REGEX));
    }//w w w.j av a 2 s. co  m

    if (storageType != null) {
        Assert.assertEquals(storageType, storage.getType().name());
    }

    // verify the post processing params
    Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName()), feed.getName());
    Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_NAMES.getName()), feed.getName());
    Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName()), "IGNORE");

    assertWorkflowRetries(getWorkflowapp(srcMiniDFS.getFileSystem(), coord));

    try {
        verifyClusterLocationsUMask(srcCluster, fs);
        verifyWorkflowUMask(fs, coord, umask);
    } finally {
        cleanupWorkflowState(fs, coord);
        FileSystem.closeAll();
    }
}

From source file:org.apache.falcon.oozie.feed.OozieFeedWorkflowBuilderTest.java

License:Apache License

@Test(dataProvider = "secureOptions")
public void testRetentionCoordsForTable(String secureOption) throws Exception {
    StartupProperties.get().setProperty("falcon.postprocessing.enable", "true");
    StartupProperties.get().setProperty(SecurityUtil.AUTHENTICATION_TYPE, secureOption);

    final String umask = "000";

    FileSystem fs = trgMiniDFS.getFileSystem();
    Configuration conf = fs.getConf();
    conf.set("fs.permissions.umask-mode", umask);

    // ClusterHelper constructs new fs Conf. Add it to cluster properties so that it gets added to FS conf
    setUmaskInFsConf(trgCluster, umask);

    org.apache.falcon.entity.v0.feed.Cluster cluster = FeedHelper.getCluster(tableFeed, trgCluster.getName());
    final Calendar instance = Calendar.getInstance();
    instance.add(Calendar.YEAR, 1);
    cluster.getValidity().setEnd(instance.getTime());

    OozieCoordinatorBuilder builder = OozieCoordinatorBuilder.get(tableFeed, Tag.RETENTION);
    List<Properties> coords = builder.buildCoords(trgCluster, new Path("/projects/falcon/"));
    COORDINATORAPP coord = getCoordinator(trgMiniDFS,
            coords.get(0).getProperty(OozieEntityBuilder.ENTITY_PATH));

    Assert.assertEquals(coord.getAction().getWorkflow().getAppPath(), "${nameNode}/projects/falcon/RETENTION");
    Assert.assertEquals(coord.getName(), "FALCON_FEED_RETENTION_" + tableFeed.getName());
    Assert.assertEquals(coord.getFrequency(), "${coord:hours(6)}");

    HashMap<String, String> props = getCoordProperties(coord);

    HashMap<String, String> wfProps = getWorkflowProperties(fs, coord);

    String feedDataPath = wfProps.get("feedDataPath");
    String storageType = wfProps.get("falconFeedStorageType");

    // verify the param that feed evictor depends on
    Assert.assertEquals(storageType, Storage.TYPE.TABLE.name());

    final Storage storage = FeedHelper.createStorage(cluster, tableFeed);
    if (feedDataPath != null) {
        Assert.assertEquals(feedDataPath, storage.getUriTemplate().replaceAll(Storage.DOLLAR_EXPR_START_REGEX,
                Storage.QUESTION_EXPR_START_REGEX));
    }/*  w w  w. j a v a2 s .co m*/

    if (storageType != null) {
        Assert.assertEquals(storageType, storage.getType().name());
    }

    // verify the post processing params
    Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_NAMES.getName()), tableFeed.getName());
    Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_NAMES.getName()), tableFeed.getName());
    Assert.assertEquals(wfProps.get(WorkflowExecutionArgs.OUTPUT_FEED_PATHS.getName()), "IGNORE");

    assertWorkflowRetries(coord);
    verifyBrokerProperties(srcCluster, wfProps);
    verifyEntityProperties(tableFeed, trgCluster, WorkflowExecutionContext.EntityOperations.DELETE, wfProps);

    Assert.assertTrue(Storage.TYPE.TABLE == FeedHelper.getStorageType(tableFeed, trgCluster));
    assertHCatCredentials(getWorkflowapp(trgMiniDFS.getFileSystem(), coord),
            coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", ""));

    try {
        verifyClusterLocationsUMask(trgCluster, fs);
        verifyWorkflowUMask(fs, coord, umask);
    } finally {
        cleanupWorkflowState(fs, coord);
        FileSystem.closeAll();
    }
}

From source file:org.apache.falcon.oozie.feed.OozieFeedWorkflowBuilderTest.java

License:Apache License

private void verifyWorkflowUMask(FileSystem fs, COORDINATORAPP coord, String defaultUMask) throws IOException {
    Assert.assertEquals(fs.getConf().get("fs.permissions.umask-mode"), defaultUMask);

    String appPath = coord.getAction().getWorkflow().getAppPath().replace("${nameNode}", "");
    Path wfPath = new Path(appPath);
    FileStatus[] fileStatuses = fs.listStatus(wfPath);
    for (FileStatus fileStatus : fileStatuses) {
        Assert.assertEquals(fileStatus.getOwner(), CurrentUser.getProxyUGI().getShortUserName());

        final FsPermission permission = fileStatus.getPermission();
        if (!fileStatus.isDirectory()) {
            Assert.assertEquals(permission.toString(),
                    HadoopClientFactory.getFileDefaultPermission(fs.getConf()).toString());
        }//from  w ww. j a  v  a2s  .c o m
    }
}