List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi//from www . j ava2 s . co m */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { // setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
@Override public void setConf(Configuration conf) { this.conf = conf; try {// w w w . ja v a 2 s .com FileSystem fs = FileSystem.get(inputPath.toUri(), conf); rowPath = fs.makeQualified(inputPath); outputTmpBasePath = fs.makeQualified(outputTmpPath); keepTempFiles = conf.getBoolean(KEEP_TEMP_FILES, false); } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication.util.DistributedRowMatrix.java
License:Apache License
@Override public void setConf(Configuration conf) { this.conf = conf; try {//ww w. j a va2 s . co m FileSystem fs = FileSystem.get(inputPath.toUri(), conf); rowPath = fs.makeQualified(inputPath); outputTmpBasePath = fs.makeQualified(outputTmpPath); } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
From source file:cascading.flow.hadoop.util.HadoopUtil.java
License:Open Source License
public static void resolvePaths(Configuration config, Collection<String> classpath, String remoteRoot, String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths) { FileSystem defaultFS = getDefaultFS(config); FileSystem localFS = getLocalFS(config); Path remoteRootPath = new Path(remoteRoot == null ? "./.staging" : remoteRoot); if (resourceSubPath != null) remoteRootPath = new Path(remoteRootPath, resourceSubPath); remoteRootPath = defaultFS.makeQualified(remoteRootPath); boolean defaultIsLocal = defaultFS.equals(localFS); for (String stringPath : classpath) { Path path = new Path(stringPath); URI uri = path.toUri();/*from ww w. j a va2s. c o m*/ if (uri.getScheme() == null && !defaultIsLocal) // we want to sync { Path localPath = localFS.makeQualified(path); if (!exists(localFS, localPath)) throw new FlowException("path not found: " + localPath); String name = localPath.getName(); if (resourceSubPath != null) name = resourceSubPath + "/" + name; localPaths.put(name, localPath); remotePaths.put(name, defaultFS.makeQualified(new Path(remoteRootPath, path.getName()))); } else if (localFS.equals(getFileSystem(config, path))) { if (!exists(localFS, path)) throw new FlowException("path not found: " + path); Path localPath = localFS.makeQualified(path); String name = localPath.getName(); if (resourceSubPath != null) name = resourceSubPath + "/" + name; localPaths.put(name, localPath); } else { if (!exists(defaultFS, path)) throw new FlowException("path not found: " + path); Path defaultPath = defaultFS.makeQualified(path); String name = defaultPath.getName(); if (resourceSubPath != null) name = resourceSubPath + "/" + name; remotePaths.put(name, defaultPath); } } }
From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java
License:Open Source License
private Path prepareEnsureStagingDir(TezConfiguration workingConf) throws IOException { String stepStagingPath = createStepStagingPath(); workingConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stepStagingPath); Path stagingDir = new Path(stepStagingPath); FileSystem fileSystem = FileSystem.get(workingConf); stagingDir = fileSystem.makeQualified(stagingDir); TokenCache.obtainTokensForNamenodes(new Credentials(), new Path[] { stagingDir }, workingConf); TezClientUtils.ensureStagingDirExists(workingConf, stagingDir); if (fileSystem.getScheme().startsWith("file:/")) new File(stagingDir.toUri()).mkdirs(); return stagingDir; }
From source file:cascading.tap.hive.HiveTap.java
License:Open Source License
@Override public boolean resourceExists(JobConf conf) throws IOException { IMetaStoreClient metaStoreClient = null; try {/* w w w. j a v a 2s. co m*/ metaStoreClient = createMetaStoreClient(); Table table = metaStoreClient.getTable(tableDescriptor.getDatabaseName(), tableDescriptor.getTableName()); modifiedTime = table.getLastAccessTime(); // check if the schema matches the table descriptor. If not, throw an exception. if (strict) { LOG.info("strict mode: comparing existing hive table with table descriptor"); if (!table.getTableType().equals(tableDescriptor.toHiveTable().getTableType())) throw new HiveTableValidationException( String.format("expected a table of type '%s' but found '%s'", tableDescriptor.toHiveTable().getTableType(), table.getTableType())); // Check that the paths are the same FileSystem fs = FileSystem.get(conf); StorageDescriptor sd = table.getSd(); Path expectedPath = fs.makeQualified( new Path(tableDescriptor.getLocation(hiveConf.getVar(ConfVars.METASTOREWAREHOUSE)))); Path actualPath = fs.makeQualified(new Path(sd.getLocation())); if (!expectedPath.equals(actualPath)) throw new HiveTableValidationException( String.format("table in MetaStore does not have the sampe path. Expected %s got %s", expectedPath, actualPath)); List<FieldSchema> schemaList = sd.getCols(); if (schemaList.size() != tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length) throw new HiveTableValidationException(String.format( "table in MetaStore does not have same number of columns. expected %d got %d", tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length, schemaList.size())); for (int index = 0; index < schemaList.size(); index++) { FieldSchema schema = schemaList.get(index); String expectedColumnName = tableDescriptor.getColumnNames()[index]; String expectedColumnType = tableDescriptor.getColumnTypes()[index]; // this could be extended to the StorageDescriptor if necessary. if (!schema.getName().equalsIgnoreCase(expectedColumnName)) throw new HiveTableValidationException( String.format("hive schema mismatch: expected column name '%s', but found '%s'", expectedColumnName, schema.getName())); if (!schema.getType().equalsIgnoreCase(expectedColumnType)) throw new HiveTableValidationException( String.format("hive schema mismatch: expected column type '%s', but found '%s'", expectedColumnType, schema.getType())); } List<FieldSchema> schemaPartitions = table.getPartitionKeys(); if (schemaPartitions.size() != tableDescriptor.getPartitionKeys().length) throw new HiveTableValidationException(String.format( "table in MetaStore does not have same number of partition columns. expected %d got %d", tableDescriptor.getPartitionKeys().length, schemaPartitions.size())); int offset = tableDescriptor.getColumnNames().length - tableDescriptor.getPartitionKeys().length; for (int index = 0; index < schemaPartitions.size(); index++) { FieldSchema schema = schemaPartitions.get(index); String expectedColumnName = tableDescriptor.getColumnNames()[index + offset]; String expectedColumnType = tableDescriptor.getColumnTypes()[index + offset]; // this could be extended to the StorageDescriptor if necessary. if (!schema.getName().equalsIgnoreCase(expectedColumnName)) throw new HiveTableValidationException(String.format( "hive partition schema mismatch: expected column name '%s', but found '%s'", expectedColumnName, schema.getName())); if (!schema.getType().equalsIgnoreCase(expectedColumnType)) throw new HiveTableValidationException(String.format( "hive partition schema mismatch: expected column type '%s', but found '%s'", expectedColumnType, schema.getType())); } } return true; } catch (MetaException exception) { throw new IOException(exception); } catch (NoSuchObjectException exception) { return false; } catch (TException exception) { throw new IOException(exception); } finally { if (metaStoreClient != null) metaStoreClient.close(); } }
From source file:cn.jpush.hdfs.mr.example.BaileyBorweinPlouffe.java
License:Apache License
/** Run a map/reduce job to compute Pi. */ private static void compute(int startDigit, int nDigits, int nMaps, String workingDir, Configuration conf, PrintStream out) throws IOException { final String name = startDigit + "_" + nDigits; // setup wroking directory out.println("Working Directory = " + workingDir); out.println();/*from w w w . j av a2 s . co m*/ // final FileSystem fs = FileSystem.get(conf);// ? final FileSystem fs = new Path(workingDir, "part-r-00000").getFileSystem(conf);// ? final Path dir = fs.makeQualified(new Path(workingDir)); if (fs.exists(dir)) { throw new IOException("Working directory " + dir + " already exists. Please remove it first."); } else if (!fs.mkdirs(dir)) { throw new IOException("Cannot create working directory " + dir); } out.println("Start Digit = " + startDigit); out.println("Number of Digits = " + nDigits); out.println("Number of Maps = " + nMaps); // setup a job final Job job = createJob(name, conf); final Path hexfile = new Path(dir, "pi_" + name + ".hex"); FileOutputFormat.setOutputPath(job, new Path(dir, "out")); // setup custom properties job.getConfiguration().set(WORKING_DIR_PROPERTY, dir.toString()); job.getConfiguration().set(HEX_FILE_PROPERTY, hexfile.toString()); job.getConfiguration().setInt(DIGIT_START_PROPERTY, startDigit); job.getConfiguration().setInt(DIGIT_SIZE_PROPERTY, nDigits); job.getConfiguration().setInt(DIGIT_PARTS_PROPERTY, nMaps); // start a map/reduce job out.println("\nStarting Job ..."); final long startTime = System.currentTimeMillis(); try { if (!job.waitForCompletion(true)) { out.println("Job failed."); System.exit(1); } } catch (Exception e) { throw new RuntimeException(e); } finally { final double duration = (System.currentTimeMillis() - startTime) / 1000.0; out.println("Duration is " + duration + " seconds."); } out.println("Output file: " + hexfile); }
From source file:co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java
License:Apache License
/** * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting. * * @param hConf the job configuration/* ww w. ja v a2 s. com*/ * @return the framework URI or {@code null} if not present or if the URI in the config is invalid. */ @Nullable public static URI getFrameworkURI(Configuration hConf) { String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); if (framework == null) { return null; } try { // Parse the path. It can contains '#' to represent the localized file name URI uri = new URI(framework); String linkName = uri.getFragment(); // The following resolution logic is copied from JobSubmitter in MR. FileSystem fs = FileSystem.get(hConf); Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath())); FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf); frameworkPath = fc.resolvePath(frameworkPath); uri = frameworkPath.toUri(); // If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name if (linkName == null) { linkName = uri.getPath(); int idx = linkName.lastIndexOf('/'); if (idx >= 0) { linkName = linkName.substring(idx + 1); } } return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName); } catch (URISyntaxException e) { LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.", framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); } catch (IOException e) { LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.", framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); } return null; }
From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java
License:Apache License
@Override public void run(ActionContext context) throws Exception { Path destination = new Path(config.getDestDirectory()); FileSystem fileSystem = FileSystem.get(new Configuration()); destination = fileSystem.makeQualified(destination); if (!fileSystem.exists(destination)) { fileSystem.mkdirs(destination);// w w w . j ava2 s .c o m } FTPClient ftp; if ("ftp".equals(config.getProtocol().toLowerCase())) { ftp = new FTPClient(); } else { ftp = new FTPSClient(); } ftp.setControlKeepAliveTimeout(5); // UNIX type server FTPClientConfig ftpConfig = new FTPClientConfig(); // Set additional parameters required for the ftp // for example config.setServerTimeZoneId("Pacific/Pitcairn") ftp.configure(ftpConfig); try { ftp.connect(config.getHost(), config.getPort()); ftp.enterLocalPassiveMode(); String replyString = ftp.getReplyString(); LOG.info("Connected to server {} and port {} with reply from connect as {}.", config.getHost(), config.getPort(), replyString); // Check the reply code for actual success int replyCode = ftp.getReplyCode(); if (!FTPReply.isPositiveCompletion(replyCode)) { ftp.disconnect(); throw new RuntimeException(String.format("FTP server refused connection with code %s and reply %s.", replyCode, replyString)); } if (!ftp.login(config.getUserName(), config.getPassword())) { LOG.error("login command reply code {}, {}", ftp.getReplyCode(), ftp.getReplyString()); ftp.logout(); throw new RuntimeException(String.format( "Login to the FTP server %s and port %s failed. " + "Please check user name and password.", config.getHost(), config.getPort())); } FTPFile[] ftpFiles = ftp.listFiles(config.getSrcDirectory()); LOG.info("listFiles command reply code: {}, {}.", ftp.getReplyCode(), ftp.getReplyString()); // Check the reply code for listFiles call. // If its "522 Data connections must be encrypted" then it means data channel also need to be encrypted if (ftp.getReplyCode() == 522 && "sftp".equalsIgnoreCase(config.getProtocol())) { // encrypt data channel and listFiles again ((FTPSClient) ftp).execPROT("P"); LOG.info("Attempting command listFiles on encrypted data channel."); ftpFiles = ftp.listFiles(config.getSrcDirectory()); } for (FTPFile file : ftpFiles) { String source = config.getSrcDirectory() + "/" + file.getName(); LOG.info("Current file {}, source {}", file.getName(), source); if (config.getExtractZipFiles() && file.getName().endsWith(".zip")) { copyZip(ftp, source, fileSystem, destination); } else { Path destinationPath = fileSystem.makeQualified(new Path(destination, file.getName())); LOG.debug("Downloading {} to {}", file.getName(), destinationPath.toString()); try (OutputStream output = fileSystem.create(destinationPath)) { InputStream is = ftp.retrieveFileStream(source); ByteStreams.copy(is, output); } } if (!ftp.completePendingCommand()) { LOG.error("Error completing command."); } } ftp.logout(); } finally { if (ftp.isConnected()) { try { ftp.disconnect(); } catch (Throwable e) { LOG.error("Failure to disconnect the ftp connection.", e); } } } }
From source file:co.cask.hydrator.action.ftp.FTPCopyAction.java
License:Apache License
private void copyZip(FTPClient ftp, String source, FileSystem fs, Path destination) throws IOException { InputStream is = ftp.retrieveFileStream(source); try (ZipInputStream zis = new ZipInputStream(new BufferedInputStream(is))) { ZipEntry entry;//from w w w. j a v a 2s. co m while ((entry = zis.getNextEntry()) != null) { LOG.debug("Extracting {}", entry); Path destinationPath = fs.makeQualified(new Path(destination, entry.getName())); try (OutputStream os = fs.create(destinationPath)) { LOG.debug("Downloading {} to {}", entry.getName(), destinationPath.toString()); ByteStreams.copy(zis, os); } } } }