List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:awshamondsidefunctions.AWSDiamondReducer.java
License:Apache License
@Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String userName = HadoopUser.getHadoopUser(); Configuration conf = context.getConfiguration(); String outPut = conf.get(DiamondMapReduce.OUTPUT); FileSystem fs = FileSystem.get(conf); //merge all output files into one output file on HDFS FileUtil.copyMerge(fs, new Path("/user/" + userName + "/Hamond/output"), fs, new Path("/user/" + userName + "/Hamond/" + new Path(outPut).getName()), false, conf, null); }
From source file:azkaban.AzkabanCommonModule.java
License:Apache License
@Inject @Provides/*from www.ja v a 2 s . com*/ @Singleton public FileSystem createHadoopFileSystem(final Configuration hadoopConf, final HdfsAuth auth) { try { auth.authorize(); return FileSystem.get(hadoopConf); } catch (final IOException e) { log.error("Unable to initialize HDFS", e); throw new AzkabanException(e); } }
From source file:azkaban.jobtype.connectors.teradata.HdfsToTeradataJobRunnerMain.java
License:Apache License
private HdfsToTeradataJobRunnerMain(Properties jobProps) throws FileNotFoundException, IOException { this(jobProps, new Whitelist(new Props(null, jobProps), FileSystem.get(new Configuration())), new Decryptions()); }
From source file:azkaban.jobtype.connectors.teradata.HdfsToTeradataJobRunnerMain.java
License:Apache License
@VisibleForTesting HdfsToTeradataJobRunnerMain(Properties jobProps, Whitelist whitelist, Decryptions decryptions) throws FileNotFoundException, IOException { _logger = JobUtils.initJobLogger();/*w w w.j a v a2s .c o m*/ _jobProps = jobProps; Props props = new Props(null, _jobProps); HadoopConfigurationInjector.injectResources(props); Configuration conf = new Configuration(); UserGroupInformation.setConfiguration(conf); if (props.containsKey(Whitelist.WHITE_LIST_FILE_PATH_KEY)) { whitelist.validateWhitelisted(props); } String encryptedCredential = _jobProps.getProperty(TdchConstants.TD_ENCRYPTED_CREDENTIAL_KEY); String cryptoKeyPath = _jobProps.getProperty(TdchConstants.TD_CRYPTO_KEY_PATH_KEY); String password = null; if (encryptedCredential != null && cryptoKeyPath != null) { password = decryptions.decrypt(encryptedCredential, cryptoKeyPath, FileSystem.get(new Configuration())); } _params = TdchParameters.builder().mrParams(_jobProps.getProperty(TdchConstants.HADOOP_CONFIG_KEY)) .libJars(props.getString(TdchConstants.LIB_JARS_KEY)) .tdJdbcClassName(TdchConstants.TERADATA_JDBCDRIVER_CLASSNAME) .teradataHostname(props.getString(TdchConstants.TD_HOSTNAME_KEY)) .fileFormat(_jobProps.getProperty(TdchConstants.HDFS_FILE_FORMAT_KEY)) .fieldSeparator(_jobProps.getProperty(TdchConstants.HDFS_FIELD_SEPARATOR_KEY)) .jobType(TdchConstants.TDCH_JOB_TYPE).userName(props.getString(TdchConstants.TD_USERID_KEY)) .credentialName(_jobProps.getProperty(TdchConstants.TD_CREDENTIAL_NAME_KEY)).password(password) .avroSchemaPath(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_PATH_KEY)) .avroSchemaInline(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_INLINE_KEY)) .sourceHdfsPath(props.getString(TdchConstants.SOURCE_HDFS_PATH_KEY)) .targetTdTableName(props.getString(TdchConstants.TARGET_TD_TABLE_NAME_KEY)) .errorTdDatabase(_jobProps.getProperty(TdchConstants.ERROR_DB_KEY)) .errorTdTableName(_jobProps.getProperty(TdchConstants.ERROR_TABLE_KEY)) .tdInsertMethod(_jobProps.getProperty(TdchConstants.TD_INSERT_METHOD_KEY)) .numMapper(TdchConstants.DEFAULT_NO_MAPPERS) .otherProperties(_jobProps.getProperty(TdchConstants.TD_OTHER_PROPERTIES_HOCON_KEY)).build(); }
From source file:azkaban.jobtype.connectors.teradata.TeradataToHdfsJobRunnerMain.java
License:Apache License
public TeradataToHdfsJobRunnerMain() throws FileNotFoundException, IOException { _logger = JobUtils.initJobLogger();/*ww w .j a v a 2s. co m*/ _jobProps = HadoopSecureWrapperUtils.loadAzkabanProps(); Props props = new Props(null, _jobProps); HadoopConfigurationInjector.injectResources(props); Configuration conf = new Configuration(); UserGroupInformation.setConfiguration(conf); if (props.containsKey(Whitelist.WHITE_LIST_FILE_PATH_KEY)) { new Whitelist(props, FileSystem.get(conf)).validateWhitelisted(props); } String encryptedCredential = _jobProps.getProperty(TdchConstants.TD_ENCRYPTED_CREDENTIAL_KEY); String cryptoKeyPath = _jobProps.getProperty(TdchConstants.TD_CRYPTO_KEY_PATH_KEY); String password = null; if (encryptedCredential != null && cryptoKeyPath != null) { password = new Decryptions().decrypt(encryptedCredential, cryptoKeyPath, FileSystem.get(new Configuration())); } _params = TdchParameters.builder().mrParams(_jobProps.getProperty(TdchConstants.HADOOP_CONFIG_KEY)) .libJars(props.getString(TdchConstants.LIB_JARS_KEY)) .tdJdbcClassName(TdchConstants.TERADATA_JDBCDRIVER_CLASSNAME) .teradataHostname(props.getString(TdchConstants.TD_HOSTNAME_KEY)) .fileFormat(_jobProps.getProperty(TdchConstants.HDFS_FILE_FORMAT_KEY)) .fieldSeparator(_jobProps.getProperty(TdchConstants.HDFS_FIELD_SEPARATOR_KEY)) .jobType(TdchConstants.TDCH_JOB_TYPE).userName(props.getString(TdchConstants.TD_USERID_KEY)) .credentialName(_jobProps.getProperty(TdchConstants.TD_CREDENTIAL_NAME_KEY)).password(password) .avroSchemaPath(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_PATH_KEY)) .avroSchemaInline(_jobProps.getProperty(TdchConstants.AVRO_SCHEMA_INLINE_KEY)) .sourceTdTableName(_jobProps.getProperty(TdchConstants.SOURCE_TD_TABLE_NAME_KEY)) .sourceQuery(_jobProps.getProperty(TdchConstants.SOURCE_TD_QUERY_NAME_KEY)) .targetHdfsPath(props.getString(TdchConstants.TARGET_HDFS_PATH_KEY)) .tdRetrieveMethod(_jobProps.getProperty(TdchConstants.TD_RETRIEVE_METHOD_KEY)) .numMapper(TdchConstants.DEFAULT_NO_MAPPERS).build(); }
From source file:azkaban.jobtype.hiveutils.azkaban.hive.actions.DropAllPartitionsAddLatest.java
License:Apache License
@Override public void execute() throws HiveViaAzkabanException { ArrayList<HQL> hql = new ArrayList<HQL>(); hql.add(new UseDatabaseHQL(database)); Configuration conf = new Configuration(); try {/* w ww . j av a2 s .c o m*/ FileSystem fs = FileSystem.get(conf); for (String table : tables) { LOG.info("Determining HQL commands for table " + table); hql.addAll(addAndDrop(fs, tableLocations, table)); } fs.close(); } catch (IOException e) { throw new HiveViaAzkabanException("Exception fetching the directories/partitions from HDFS", e); } StringBuffer query = new StringBuffer(); for (HQL q : hql) { query.append(q.toHQL()).append("\n"); } System.out.println("Query to execute:\n" + query.toString()); try { hqe.executeQuery(query.toString()); } catch (HiveQueryExecutionException e) { throw new HiveViaAzkabanException("Problem executing query [" + query.toString() + "] on Hive", e); } }
From source file:azkaban.jobtype.hiveutils.azkaban.hive.actions.UpdateTableLocationToLatest.java
License:Apache License
@Override public void execute() throws HiveViaAzkabanException { ArrayList<HQL> hql = new ArrayList<HQL>(); hql.add(new UseDatabaseHQL(database)); Configuration conf = new Configuration(); try {/*from w ww . j a v a2 s . co m*/ FileSystem fs = FileSystem.get(conf); for (int i = 0; i < tables.length; i++) { LOG.info("Determining HQL commands for table " + tables[i]); hql.add(latestURI(fs, tablesLocations[i], tables[i])); } fs.close(); } catch (IOException e) { throw new HiveViaAzkabanException("Exception fetching the directories from HDFS", e); } StringBuffer query = new StringBuffer(); for (HQL q : hql) { query.append(q.toHQL()).append("\n"); } System.out.println("Query to execute:\n" + query.toString()); try { hqe.executeQuery(query.toString()); } catch (HiveQueryExecutionException e) { throw new HiveViaAzkabanException("Problem executing query [" + query.toString() + "] on Hive", e); } }
From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java
License:Apache License
@SuppressWarnings("rawtypes") public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) throws IOException, URISyntaxException { JobConf conf = new JobConf(); // set custom class loader with custom find resource strategy. conf.setJobName(getJobName());/*from w ww. j a v a 2 s .c om*/ conf.setMapperClass(mapperClass); if (reducerClass != null) { conf.setReducerClass(reducerClass); } if (props.getBoolean("is.local", false)) { conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); logger.info("Running locally, no hadoop jar set."); } else { HadoopUtils.setClassLoaderAndJar(conf, getClass()); logger.info("Setting hadoop jar file for class:" + getClass() + " to " + conf.getJar()); logger.info("*************************************************************************"); logger.info( " Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ") "); logger.info("*************************************************************************"); } // set JVM options if present if (props.containsKey("mapred.child.java.opts")) { conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts")); logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts")); } // set input and output paths if they are present if (props.containsKey("input.paths")) { List<String> inputPaths = props.getStringList("input.paths"); if (inputPaths.size() == 0) throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'"); for (String path : inputPaths) { HadoopUtils.addAllSubPaths(conf, new Path(path)); } } if (props.containsKey("output.path")) { String location = props.get("output.path"); FileOutputFormat.setOutputPath(conf, new Path(location)); // For testing purpose only remove output file if exists if (props.getBoolean("force.output.overwrite", false)) { FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf); fs.delete(FileOutputFormat.getOutputPath(conf), true); } } // Adds External jars to hadoop classpath String externalJarList = props.getString("hadoop.external.jarFiles", null); if (externalJarList != null) { FileSystem fs = FileSystem.get(conf); String[] jarFiles = externalJarList.split(","); for (String jarFile : jarFiles) { logger.info("Adding extenral jar File:" + jarFile); DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs); } } // Adds distributed cache files String cacheFileList = props.getString("hadoop.cache.files", null); if (cacheFileList != null) { String[] cacheFiles = cacheFileList.split(","); for (String cacheFile : cacheFiles) { logger.info("Adding Distributed Cache File:" + cacheFile); DistributedCache.addCacheFile(new URI(cacheFile), conf); } } // Adds distributed cache files String archiveFileList = props.getString("hadoop.cache.archives", null); if (archiveFileList != null) { String[] archiveFiles = archiveFileList.split(","); for (String archiveFile : archiveFiles) { logger.info("Adding Distributed Cache Archive File:" + archiveFile); DistributedCache.addCacheArchive(new URI(archiveFile), conf); } } String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null); if (hadoopCacheJarDir != null) { FileSystem fs = FileSystem.get(conf); if (fs != null) { FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir)); if (status != null) { for (int i = 0; i < status.length; ++i) { if (!status[i].isDir()) { Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName()); logger.info("Adding Jar to Distributed Cache Archive File:" + path); DistributedCache.addFileToClassPath(path, conf, fs); } } } else { logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty."); } } else { logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist"); } } for (String key : getProps().getKeySet()) { String lowerCase = key.toLowerCase(); if (lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, getProps().get(key)); } } HadoopUtils.setPropsInJob(conf, getProps()); // put in tokens if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) { conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION)); } return conf; }
From source file:azkaban.jobtype.StatsUtils.java
License:Apache License
public static Properties getJobConf(RunningJob runningJob) { try {/*from w w w . j av a 2s .c o m*/ Path path = new Path(runningJob.getJobFile()); Configuration conf = new Configuration(false); FileSystem fs = FileSystem.get(new Configuration()); InputStream in = fs.open(path); conf.addResource(in); return getJobConf(conf); } catch (FileNotFoundException e) { logger.warn("Job conf not found."); } catch (IOException e) { logger.warn("Error while retrieving job conf: " + e.getMessage()); } return null; }
From source file:azkaban.reportal.util.StreamProviderHDFS.java
License:Apache License
private void ensureHdfs() throws HadoopSecurityManagerException, IOException { if (hdfs == null) { if (securityManager == null) { hdfs = FileSystem.get(new Configuration()); } else {//from w w w. j a v a2s .com hdfs = securityManager.getFSAsUser(username); } } }