Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:awshamondsidefunctions.AWSDiamondReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    String userName = HadoopUser.getHadoopUser();
    Configuration conf = context.getConfiguration();
    String outPut = conf.get(DiamondMapReduce.OUTPUT);
    FileSystem fs = FileSystem.get(conf);
    //merge all output files into one output file on HDFS
    FileUtil.copyMerge(fs, new Path("/user/" + userName + "/Hamond/output"), fs,
            new Path("/user/" + userName + "/Hamond/" + new Path(outPut).getName()), false, conf, null);
}

From source file:azkaban.AzkabanCommonModule.java

License:Apache License

@Inject
@Provides/*from  www.ja  v a  2 s .  com*/
@Singleton
public FileSystem createHadoopFileSystem(final Configuration hadoopConf, final HdfsAuth auth) {
    try {
        auth.authorize();
        return FileSystem.get(hadoopConf);
    } catch (final IOException e) {
        log.error("Unable to initialize HDFS", e);
        throw new AzkabanException(e);
    }
}

From source file:azkaban.jobtype.connectors.teradata.HdfsToTeradataJobRunnerMain.java

License:Apache License

private HdfsToTeradataJobRunnerMain(Properties jobProps) throws FileNotFoundException, IOException {
    this(jobProps, new Whitelist(new Props(null, jobProps), FileSystem.get(new Configuration())),
            new Decryptions());
}

From source file:azkaban.jobtype.connectors.teradata.HdfsToTeradataJobRunnerMain.java

License:Apache License

@VisibleForTesting
HdfsToTeradataJobRunnerMain(Properties jobProps, Whitelist whitelist, Decryptions decryptions)
        throws FileNotFoundException, IOException {
    _logger = JobUtils.initJobLogger();/*w  w  w.j a v a2s  .c  o  m*/
    _jobProps = jobProps;

    Props props = new Props(null, _jobProps);

    HadoopConfigurationInjector.injectResources(props);
    Configuration conf = new Configuration();
    UserGroupInformation.setConfiguration(conf);

    if (props.containsKey(Whitelist.WHITE_LIST_FILE_PATH_KEY)) {
        whitelist.validateWhitelisted(props);
    }

    String encryptedCredential = _jobProps.getProperty(TdchConstants.TD_ENCRYPTED_CREDENTIAL_KEY);
    String cryptoKeyPath = _jobProps.getProperty(TdchConstants.TD_CRYPTO_KEY_PATH_KEY);
    String password = null;

    if (encryptedCredential != null && cryptoKeyPath != null) {
        password = decryptions.decrypt(encryptedCredential, cryptoKeyPath, FileSystem.get(new Configuration()));
    }

    _params = TdchParameters.builder().mrParams(_jobProps.getProperty(TdchConstants.HADOOP_CONFIG_KEY))
            .libJars(props.getString(TdchConstants.LIB_JARS_KEY))
            .tdJdbcClassName(TdchConstants.TERADATA_JDBCDRIVER_CLASSNAME)
            .teradataHostname(props.getString(TdchConstants.TD_HOSTNAME_KEY))
            .fileFormat(_jobProps.getProperty(TdchConstants.HDFS_FILE_FORMAT_KEY))
            .fieldSeparator(_jobProps.getProperty(TdchConstants.HDFS_FIELD_SEPARATOR_KEY))
            .jobType(TdchConstants.TDCH_JOB_TYPE).userName(props.getString(TdchConstants.TD_USERID_KEY))
            .credentialName(_jobProps.getProperty(TdchConstants.TD_CREDENTIAL_NAME_KEY)).password(password)
            .avroSchemaPath(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_PATH_KEY))
            .avroSchemaInline(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_INLINE_KEY))
            .sourceHdfsPath(props.getString(TdchConstants.SOURCE_HDFS_PATH_KEY))
            .targetTdTableName(props.getString(TdchConstants.TARGET_TD_TABLE_NAME_KEY))
            .errorTdDatabase(_jobProps.getProperty(TdchConstants.ERROR_DB_KEY))
            .errorTdTableName(_jobProps.getProperty(TdchConstants.ERROR_TABLE_KEY))
            .tdInsertMethod(_jobProps.getProperty(TdchConstants.TD_INSERT_METHOD_KEY))
            .numMapper(TdchConstants.DEFAULT_NO_MAPPERS)
            .otherProperties(_jobProps.getProperty(TdchConstants.TD_OTHER_PROPERTIES_HOCON_KEY)).build();
}

From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java

License:Apache License

public TeradataToHdfsJobRunnerMain() throws FileNotFoundException, IOException {
    _logger = JobUtils.initJobLogger();/*ww w .j  a  v a 2s.  co  m*/
    _jobProps = HadoopSecureWrapperUtils.loadAzkabanProps();

    Props props = new Props(null, _jobProps);
    HadoopConfigurationInjector.injectResources(props);
    Configuration conf = new Configuration();
    UserGroupInformation.setConfiguration(conf);

    if (props.containsKey(Whitelist.WHITE_LIST_FILE_PATH_KEY)) {
        new Whitelist(props, FileSystem.get(conf)).validateWhitelisted(props);
    }
    String encryptedCredential = _jobProps.getProperty(TdchConstants.TD_ENCRYPTED_CREDENTIAL_KEY);
    String cryptoKeyPath = _jobProps.getProperty(TdchConstants.TD_CRYPTO_KEY_PATH_KEY);
    String password = null;
    if (encryptedCredential != null && cryptoKeyPath != null) {
        password = new Decryptions().decrypt(encryptedCredential, cryptoKeyPath,
                FileSystem.get(new Configuration()));
    }

    _params = TdchParameters.builder().mrParams(_jobProps.getProperty(TdchConstants.HADOOP_CONFIG_KEY))
            .libJars(props.getString(TdchConstants.LIB_JARS_KEY))
            .tdJdbcClassName(TdchConstants.TERADATA_JDBCDRIVER_CLASSNAME)
            .teradataHostname(props.getString(TdchConstants.TD_HOSTNAME_KEY))
            .fileFormat(_jobProps.getProperty(TdchConstants.HDFS_FILE_FORMAT_KEY))
            .fieldSeparator(_jobProps.getProperty(TdchConstants.HDFS_FIELD_SEPARATOR_KEY))
            .jobType(TdchConstants.TDCH_JOB_TYPE).userName(props.getString(TdchConstants.TD_USERID_KEY))
            .credentialName(_jobProps.getProperty(TdchConstants.TD_CREDENTIAL_NAME_KEY)).password(password)
            .avroSchemaPath(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_PATH_KEY))
            .avroSchemaInline(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_INLINE_KEY))
            .sourceTdTableName(_jobProps.getProperty(TdchConstants.SOURCE_TD_TABLE_NAME_KEY))
            .sourceQuery(_jobProps.getProperty(TdchConstants.SOURCE_TD_QUERY_NAME_KEY))
            .targetHdfsPath(props.getString(TdchConstants.TARGET_HDFS_PATH_KEY))
            .tdRetrieveMethod(_jobProps.getProperty(TdchConstants.TD_RETRIEVE_METHOD_KEY))
            .numMapper(TdchConstants.DEFAULT_NO_MAPPERS).build();
}

From source file:azkaban.jobtype.hiveutils.azkaban.hive.actions.DropAllPartitionsAddLatest.java

License:Apache License

@Override
public void execute() throws HiveViaAzkabanException {
    ArrayList<HQL> hql = new ArrayList<HQL>();
    hql.add(new UseDatabaseHQL(database));

    Configuration conf = new Configuration();
    try {/* w ww . j  av a2  s  .c  o  m*/
        FileSystem fs = FileSystem.get(conf);

        for (String table : tables) {
            LOG.info("Determining HQL commands for table " + table);
            hql.addAll(addAndDrop(fs, tableLocations, table));
        }
        fs.close();
    } catch (IOException e) {
        throw new HiveViaAzkabanException("Exception fetching the directories/partitions from HDFS", e);
    }

    StringBuffer query = new StringBuffer();
    for (HQL q : hql) {
        query.append(q.toHQL()).append("\n");
    }

    System.out.println("Query to execute:\n" + query.toString());
    try {
        hqe.executeQuery(query.toString());
    } catch (HiveQueryExecutionException e) {
        throw new HiveViaAzkabanException("Problem executing query [" + query.toString() + "] on Hive", e);
    }

}

From source file:azkaban.jobtype.hiveutils.azkaban.hive.actions.UpdateTableLocationToLatest.java

License:Apache License

@Override
public void execute() throws HiveViaAzkabanException {
    ArrayList<HQL> hql = new ArrayList<HQL>();
    hql.add(new UseDatabaseHQL(database));

    Configuration conf = new Configuration();
    try {/*from  w ww .  j  a  v  a2 s  .  co m*/
        FileSystem fs = FileSystem.get(conf);

        for (int i = 0; i < tables.length; i++) {
            LOG.info("Determining HQL commands for table " + tables[i]);
            hql.add(latestURI(fs, tablesLocations[i], tables[i]));
        }
        fs.close();
    } catch (IOException e) {
        throw new HiveViaAzkabanException("Exception fetching the directories from HDFS", e);
    }
    StringBuffer query = new StringBuffer();
    for (HQL q : hql) {
        query.append(q.toHQL()).append("\n");
    }

    System.out.println("Query to execute:\n" + query.toString());
    try {
        hqe.executeQuery(query.toString());
    } catch (HiveQueryExecutionException e) {
        throw new HiveViaAzkabanException("Problem executing query [" + query.toString() + "] on Hive", e);
    }

}

From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java

License:Apache License

@SuppressWarnings("rawtypes")
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass)
        throws IOException, URISyntaxException {
    JobConf conf = new JobConf();
    // set custom class loader with custom find resource strategy.

    conf.setJobName(getJobName());/*from w  ww. j  a  v  a  2 s  .c om*/
    conf.setMapperClass(mapperClass);
    if (reducerClass != null) {
        conf.setReducerClass(reducerClass);
    }

    if (props.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        logger.info("Running locally, no hadoop jar set.");
    } else {
        HadoopUtils.setClassLoaderAndJar(conf, getClass());
        logger.info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
        logger.info("*************************************************************************");
        logger.info(
                "          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ")           ");
        logger.info("*************************************************************************");
    }

    // set JVM options if present
    if (props.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts"));
        logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts"));
    }

    // set input and output paths if they are present
    if (props.containsKey("input.paths")) {
        List<String> inputPaths = props.getStringList("input.paths");
        if (inputPaths.size() == 0)
            throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
        for (String path : inputPaths) {
            HadoopUtils.addAllSubPaths(conf, new Path(path));
        }
    }

    if (props.containsKey("output.path")) {
        String location = props.get("output.path");
        FileOutputFormat.setOutputPath(conf, new Path(location));

        // For testing purpose only remove output file if exists
        if (props.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }

    // Adds External jars to hadoop classpath
    String externalJarList = props.getString("hadoop.external.jarFiles", null);
    if (externalJarList != null) {
        FileSystem fs = FileSystem.get(conf);
        String[] jarFiles = externalJarList.split(",");
        for (String jarFile : jarFiles) {
            logger.info("Adding extenral jar File:" + jarFile);
            DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs);
        }
    }

    // Adds distributed cache files
    String cacheFileList = props.getString("hadoop.cache.files", null);
    if (cacheFileList != null) {
        String[] cacheFiles = cacheFileList.split(",");
        for (String cacheFile : cacheFiles) {
            logger.info("Adding Distributed Cache File:" + cacheFile);
            DistributedCache.addCacheFile(new URI(cacheFile), conf);
        }
    }

    // Adds distributed cache files
    String archiveFileList = props.getString("hadoop.cache.archives", null);
    if (archiveFileList != null) {
        String[] archiveFiles = archiveFileList.split(",");
        for (String archiveFile : archiveFiles) {
            logger.info("Adding Distributed Cache Archive File:" + archiveFile);
            DistributedCache.addCacheArchive(new URI(archiveFile), conf);
        }
    }

    String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null);
    if (hadoopCacheJarDir != null) {
        FileSystem fs = FileSystem.get(conf);
        if (fs != null) {
            FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));

            if (status != null) {
                for (int i = 0; i < status.length; ++i) {
                    if (!status[i].isDir()) {
                        Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
                        logger.info("Adding Jar to Distributed Cache Archive File:" + path);

                        DistributedCache.addFileToClassPath(path, conf, fs);
                    }
                }
            } else {
                logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
            }
        } else {
            logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist");
        }
    }

    for (String key : getProps().getKeySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, getProps().get(key));
        }
    }

    HadoopUtils.setPropsInJob(conf, getProps());

    // put in tokens
    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
        conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
    }

    return conf;
}

From source file:azkaban.jobtype.StatsUtils.java

License:Apache License

public static Properties getJobConf(RunningJob runningJob) {
    try {/*from   w  w  w .  j av a 2s  .c  o  m*/
        Path path = new Path(runningJob.getJobFile());
        Configuration conf = new Configuration(false);
        FileSystem fs = FileSystem.get(new Configuration());
        InputStream in = fs.open(path);
        conf.addResource(in);
        return getJobConf(conf);
    } catch (FileNotFoundException e) {
        logger.warn("Job conf not found.");
    } catch (IOException e) {
        logger.warn("Error while retrieving job conf: " + e.getMessage());
    }
    return null;
}

From source file:azkaban.reportal.util.StreamProviderHDFS.java

License:Apache License

private void ensureHdfs() throws HadoopSecurityManagerException, IOException {
    if (hdfs == null) {
        if (securityManager == null) {
            hdfs = FileSystem.get(new Configuration());
        } else {//from w w w.  j  a v a2s .com
            hdfs = securityManager.getFSAsUser(username);
        }
    }
}