List of usage examples for org.apache.hadoop.fs FileSystem isFile
@Deprecated public boolean isFile(Path f) throws IOException
From source file:io.amient.yarn1.YarnClient.java
License:Open Source License
/** * Distribute all dependencies in a single jar both from Client to Master as well as Master to Container(s) *//*from w ww .j av a 2s .c o m*/ public static void distributeResources(Configuration yarnConf, Properties appConf, String appName) throws IOException { final FileSystem distFs = FileSystem.get(yarnConf); final FileSystem localFs = FileSystem.getLocal(yarnConf); try { //distribute configuration final Path dstConfig = new Path(distFs.getHomeDirectory(), appName + ".configuration"); final FSDataOutputStream fs = distFs.create(dstConfig); appConf.store(fs, "Yarn1 Application Config for " + appName); fs.close(); log.info("Updated resource " + dstConfig); //distribute main jar final String localPath = YarnClient.class.getProtectionDomain().getCodeSource().getLocation().getFile() .replace(".jar/", ".jar"); final Path src; final String jarName = appName + ".jar"; if (localPath.endsWith(".jar")) { log.info("Distributing local jar : " + localPath); src = new Path(localPath); } else { try { String localArchive = localPath + appName + ".jar"; localFs.delete(new Path(localArchive), false); log.info("Unpacking compile scope dependencies: " + localPath); executeShell("mvn -f " + localPath + "/../.. generate-resources"); log.info("Preparing application main jar " + localArchive); executeShell("jar cMf " + localArchive + " -C " + localPath + " ./"); src = new Path(localArchive); } catch (InterruptedException e) { throw new IOException(e); } } byte[] digest; final MessageDigest md = MessageDigest.getInstance("MD5"); try (InputStream is = new FileInputStream(src.toString())) { DigestInputStream dis = new DigestInputStream(is, md); byte[] buffer = new byte[8192]; int numOfBytesRead; while ((numOfBytesRead = dis.read(buffer)) > 0) { md.update(buffer, 0, numOfBytesRead); } digest = md.digest(); } log.info("Local check sum: " + Hex.encodeHexString(digest)); final Path dst = new Path(distFs.getHomeDirectory(), jarName); Path remoteChecksumFile = new Path(distFs.getHomeDirectory(), jarName + ".md5"); boolean checksumMatches = false; if (distFs.isFile(remoteChecksumFile)) { try (InputStream r = distFs.open(remoteChecksumFile)) { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int nRead; byte[] data = new byte[1024]; while ((nRead = r.read(data, 0, data.length)) != -1) { buffer.write(data, 0, nRead); } buffer.flush(); byte[] remoteDigest = buffer.toByteArray(); log.info("Remote check sum: " + Hex.encodeHexString(remoteDigest)); checksumMatches = Arrays.equals(digest, remoteDigest); } } if (!checksumMatches) { log.info("Updating resource " + dst + " ..."); distFs.copyFromLocalFile(false, true, src, dst); try (FSDataOutputStream remoteChecksumStream = distFs.create(remoteChecksumFile)) { log.info("Updating checksum " + remoteChecksumFile + " ..."); remoteChecksumStream.write(digest); } FileStatus scFileStatus = distFs.getFileStatus(dst); log.info("Updated resource " + dst + " " + scFileStatus.getLen()); } } catch (NoSuchAlgorithmException e) { throw new IOException(e); } }
From source file:io.transwarp.flume.sink.HDFSCompressedDataStream.java
License:Apache License
@Override public void open(String filePath, CompressionCodec codec, CompressionType cType) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem hdfs = dstPath.getFileSystem(conf); if (useRawLocalFileSystem) { if (hdfs instanceof LocalFileSystem) { hdfs = ((LocalFileSystem) hdfs).getRaw(); } else {/*w w w . jav a 2 s . c o m*/ logger.warn("useRawLocalFileSystem is set to true but file system " + "is not of type LocalFileSystem: " + hdfs.getClass().getName()); } } boolean appending = false; if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) { fsOut = hdfs.append(dstPath); appending = true; } else { fsOut = hdfs.create(dstPath); } if (compressor == null) { compressor = CodecPool.getCompressor(codec, conf); } cmpOut = codec.createOutputStream(fsOut, compressor); serializer = EventSerializerFactory.getInstance(serializerType, serializerContext, cmpOut); if (appending && !serializer.supportsReopen()) { cmpOut.close(); serializer = null; throw new IOException("serializer (" + serializerType + ") does not support append"); } registerCurrentStream(fsOut, hdfs, dstPath); if (appending) { serializer.afterReopen(); } else { serializer.afterCreate(); } isFinished = false; }
From source file:jp.ac.u.tokyo.m.pig.udf.load.LoadDataWithSchema.java
License:Apache License
@Override public ResourceSchema getSchema(String aLocation, Job aJob) throws IOException { Configuration tConfiguration = aJob.getConfiguration(); Path tDataPath = new Path(aLocation); FileSystem tFileSystem = tDataPath.getFileSystem(tConfiguration); Path tSchemaFilePath = tFileSystem.isFile(tDataPath) ? new Path(tDataPath.getParent(), StoreConstants.STORE_FILE_NAME_SCHEMA) : new Path(tDataPath, StoreConstants.STORE_FILE_NAME_SCHEMA); RowSchema tRowSchema = LoadSchemaUtil.loadSchemaFile(tFileSystem, tSchemaFilePath, mEncoding); ResourceSchema tResourceSchema = new ResourceSchema(); TypeStringCasterPigToPigTypeByte tTypeCaster = TypeStringCasterPigToPigTypeByte.INSTANCE; List<ColumnSchema> tColumnSchemaList = tRowSchema.getColumnSchemaList(); int tSize = tColumnSchemaList.size(); ResourceFieldSchema[] tResourceFieldSchemas = new ResourceFieldSchema[tSize]; int tIndex = 0; for (ColumnSchema tCurrentColumnSchema : tColumnSchemaList) { tResourceFieldSchemas[tIndex++] = new ResourceFieldSchema(new FieldSchema( tCurrentColumnSchema.getName(), tTypeCaster.castTypeString(tCurrentColumnSchema.getType()))); }/* w w w . j a v a2 s .c o m*/ tResourceSchema.setFields(tResourceFieldSchemas); return tResourceSchema; }
From source file:net.sf.katta.node.ShardManager.java
License:Apache License
private void installShard(String shardName, String shardPath, File localShardFolder) throws KattaException { LOG.info("install shard '" + shardName + "' from " + shardPath); // TODO sg: to fix HADOOP-4422 we try to download the shard 5 times int maxTries = 5; for (int i = 0; i < maxTries; i++) { URI uri;/*from w w w . j a v a 2s . co m*/ try { uri = new URI(shardPath); FileSystem fileSystem = FileSystem.get(uri, new Configuration()); if (_throttleSemaphore != null) { fileSystem = new ThrottledFileSystem(fileSystem, _throttleSemaphore); } final Path path = new Path(shardPath); boolean isZip = fileSystem.isFile(path) && shardPath.endsWith(".zip"); File shardTmpFolder = new File(localShardFolder.getAbsolutePath() + "_tmp"); try { FileUtil.deleteFolder(localShardFolder); FileUtil.deleteFolder(shardTmpFolder); if (isZip) { FileUtil.unzip(path, shardTmpFolder, fileSystem, System.getProperty("katta.spool.zip.shards", "false").equalsIgnoreCase("true")); } else { fileSystem.copyToLocalFile(path, new Path(shardTmpFolder.getAbsolutePath())); } shardTmpFolder.renameTo(localShardFolder); } finally { // Ensure that the tmp folder is deleted on an error FileUtil.deleteFolder(shardTmpFolder); } // Looks like we were successful. if (i > 0) { LOG.error("Loaded shard:" + shardPath); } return; } catch (final URISyntaxException e) { throw new KattaException("Can not parse uri for path: " + shardPath, e); } catch (final Exception e) { LOG.error(String.format("Error loading shard: %s (try %d of %d)", shardPath, i, maxTries), e); if (i >= maxTries - 1) { throw new KattaException("Can not load shard: " + shardPath, e); } } } }
From source file:net.team1.dev.HousingAnalysis.java
License:Apache License
/** * The main entry point for the map/reduce runner. * * @param args 2 args: \<input dir\> \<output dir\> * @throws Exception Throws IOException//from ww w.j av a 2 s. c o m */ public static void main(String[] args) throws Exception { Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); FileSystem fs = FileSystem.get(new Configuration()); if (!fs.exists(inputDir)) throw new IOException("The input path does not exist."); if (fs.isFile(inputDir)) throw new IOException("The input path is a file."); if (fs.exists(outputDir)) fs.delete(outputDir, true); // set job configuration JobConf conf = new JobConf(HousingAnalysis.class); conf.setJobName("housinganalysis"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(TextOutputFormat.class); conf.setCombinerClass(HousingReducer.class); conf.setReducerClass(HousingReducer.class); // set multiple input files HashMap<Path, Class<? extends Mapper>> inputMappers = getInputFilePaths(inputDir, fs); for (Path p : inputMappers.keySet()) { MultipleInputs.addInputPath(conf, p, TextInputFormat.class, inputMappers.get(p)); LOG.info(p.getName() + ": " + inputMappers.get(p).getName()); } // set output FileOutputFormat.setOutputPath(conf, outputDir); // start the job JobClient.runJob(conf); }
From source file:org.ankus.mapreduce.algorithms.clustering.kmeans.KMeansDriver.java
License:Apache License
private void setInitialClusterCenter(Configuration conf, String clusterOutputPath) throws Exception { /**/*from w w w. ja va 2 s .c om*/ * TODO: * Current Process * - get top n data (n is defined cluster count) * - set each data to initial cluster center * * Following Process is reasonable. => MR Job * 1. Distribution * - get statistics(distribution) for all attributes * - use min/max and freq for initial cluster center setting * numeric => (max-min) / cluster count * nominal => each value (freq sort) */ FileSystem fs = FileSystem.get(conf); String readStr, tokens[]; int index = 0; int clusterCnt = Integer.parseInt(conf.get(ArgumentsConstants.CLUSTER_COUNT, "1")); KMeansClusterInfoMgr clusters[] = new KMeansClusterInfoMgr[clusterCnt]; Path inputPath = new Path(conf.get(ArgumentsConstants.INPUT_PATH, null)); if (!fs.isFile(inputPath)) { boolean isFile = false; while (!isFile) { FileStatus[] status = fs.listStatus(inputPath); if (fs.isFile(status[0].getPath())) isFile = true; inputPath = status[0].getPath(); } } FSDataInputStream fin = fs.open(inputPath); BufferedReader br = new BufferedReader(new InputStreamReader(fin, Constants.UTF8)); while ((readStr = br.readLine()) != null) { clusters[index] = new KMeansClusterInfoMgr(); clusters[index].setClusterID(index); tokens = readStr.split(conf.get(ArgumentsConstants.DELIMITER, "\t")); for (int i = 0; i < tokens.length; i++) { if (CommonMethods.isContainIndex(mIndexArr, i, true) && !CommonMethods.isContainIndex(mExceptionIndexArr, i, false)) { if (CommonMethods.isContainIndex(mNominalIndexArr, i, false)) { clusters[index].addAttributeValue(i, tokens[i], ConfigurationVariable.NOMINAL_ATTRIBUTE); } else clusters[index].addAttributeValue(i, tokens[i], ConfigurationVariable.NUMERIC_ATTRIBUTE); } } index++; if (index >= clusterCnt) break; } br.close(); fin.close(); FSDataOutputStream fout = fs.create(new Path(clusterOutputPath + "/part-r-00000"), true); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fout, Constants.UTF8)); for (int i = 0; i < clusters.length; i++) { bw.write(clusters[i].getClusterInfoString(conf.get(ArgumentsConstants.DELIMITER, "\t"), mNominalDelimiter) + "\n"); } bw.close(); fout.close(); }
From source file:org.apache.accumulo.core.client.mock.MockTableOperations.java
License:Apache License
@Override public void importDirectory(String tableName, String dir, String failureDir, boolean setTime) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { long time = System.currentTimeMillis(); MockTable table = acu.tables.get(tableName); if (table == null) { throw new TableNotFoundException(null, tableName, "The table was not found"); }/* w ww .j ava 2 s .co m*/ Path importPath = new Path(dir); Path failurePath = new Path(failureDir); FileSystem fs = acu.getFileSystem(); /* * check preconditions */ // directories are directories if (fs.isFile(importPath)) { throw new IOException("Import path must be a directory."); } if (fs.isFile(failurePath)) { throw new IOException("Failure path must be a directory."); } // failures are writable Path createPath = failurePath.suffix("/.createFile"); FSDataOutputStream createStream = null; try { createStream = fs.create(createPath); } catch (IOException e) { throw new IOException("Error path is not writable."); } finally { if (createStream != null) { createStream.close(); } } fs.delete(createPath, false); // failures are empty FileStatus[] failureChildStats = fs.listStatus(failurePath); if (failureChildStats.length > 0) { throw new IOException("Error path must be empty."); } /* * Begin the import - iterate the files in the path */ for (FileStatus importStatus : fs.listStatus(importPath)) { try { FileSKVIterator importIterator = FileOperations.getInstance().newReaderBuilder() .forFile(importStatus.getPath().toString(), fs, fs.getConf()) .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).seekToBeginning() .build(); while (importIterator.hasTop()) { Key key = importIterator.getTopKey(); Value value = importIterator.getTopValue(); if (setTime) { key.setTimestamp(time); } Mutation mutation = new Mutation(key.getRow()); if (!key.isDeleted()) { mutation.put(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp(), value); } else { mutation.putDelete(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp()); } table.addMutation(mutation); importIterator.next(); } } catch (Exception e) { FSDataOutputStream failureWriter = null; DataInputStream failureReader = null; try { failureWriter = fs.create(failurePath.suffix("/" + importStatus.getPath().getName())); failureReader = fs.open(importStatus.getPath()); int read = 0; byte[] buffer = new byte[1024]; while (-1 != (read = failureReader.read(buffer))) { failureWriter.write(buffer, 0, read); } } finally { if (failureReader != null) failureReader.close(); if (failureWriter != null) failureWriter.close(); } } fs.delete(importStatus.getPath(), true); } }
From source file:org.apache.accumulo.core.client.mock.MockTableOperationsImpl.java
License:Apache License
@Override public void importDirectory(String tableName, String dir, String failureDir, boolean setTime) throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { long time = System.currentTimeMillis(); MockTable table = acu.tables.get(tableName); if (table == null) { throw new TableNotFoundException(null, tableName, "The table was not found"); }//w w w. ja v a2 s .c om Path importPath = new Path(dir); Path failurePath = new Path(failureDir); FileSystem fs = acu.getFileSystem(); /* * check preconditions */ // directories are directories if (fs.isFile(importPath)) { throw new IOException("Import path must be a directory."); } if (fs.isFile(failurePath)) { throw new IOException("Failure path must be a directory."); } // failures are writable Path createPath = failurePath.suffix("/.createFile"); FSDataOutputStream createStream = null; try { createStream = fs.create(createPath); } catch (IOException e) { throw new IOException("Error path is not writable."); } finally { if (createStream != null) { createStream.close(); } } fs.delete(createPath, false); // failures are empty FileStatus[] failureChildStats = fs.listStatus(failurePath); if (failureChildStats.length > 0) { throw new IOException("Error path must be empty."); } /* * Begin the import - iterate the files in the path */ for (FileStatus importStatus : fs.listStatus(importPath)) { try { FileSKVIterator importIterator = FileOperations.getInstance().openReader( importStatus.getPath().toString(), true, fs, fs.getConf(), AccumuloConfiguration.getDefaultConfiguration()); while (importIterator.hasTop()) { Key key = importIterator.getTopKey(); Value value = importIterator.getTopValue(); if (setTime) { key.setTimestamp(time); } Mutation mutation = new Mutation(key.getRow()); if (!key.isDeleted()) { mutation.put(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp(), value); } else { mutation.putDelete(key.getColumnFamily(), key.getColumnQualifier(), new ColumnVisibility(key.getColumnVisibilityData().toArray()), key.getTimestamp()); } table.addMutation(mutation); importIterator.next(); } } catch (Exception e) { FSDataOutputStream failureWriter = null; DataInputStream failureReader = null; try { failureWriter = fs.create(failurePath.suffix("/" + importStatus.getPath().getName())); failureReader = fs.open(importStatus.getPath()); int read = 0; byte[] buffer = new byte[1024]; while (-1 != (read = failureReader.read(buffer))) { failureWriter.write(buffer, 0, read); } } finally { if (failureReader != null) failureReader.close(); if (failureWriter != null) failureWriter.close(); } } fs.delete(importStatus.getPath(), true); } }
From source file:org.apache.ambari.fast_hdfs_resource.Resource.java
License:Apache License
public static void checkResourceParameters(Resource resource, FileSystem dfs) throws IllegalArgumentException, IOException { ArrayList<String> actionsAvailable = new ArrayList<String>(); actionsAvailable.add("create"); actionsAvailable.add("delete"); ArrayList<String> typesAvailable = new ArrayList<String>(); typesAvailable.add("file"); typesAvailable.add("directory"); if (resource.getTarget() == null) throw new IllegalArgumentException("Path to resource in HadoopFs must be filled."); if (resource.getAction() == null || !actionsAvailable.contains(resource.getAction())) throw new IllegalArgumentException("Action is not supported."); if (resource.getType() == null || !typesAvailable.contains(resource.getType())) throw new IllegalArgumentException("Type is not supported."); // Check consistency for ("type":"file" == file in hadoop) if (dfs.isFile(new Path(resource.getTarget())) && !"file".equals(resource.getType())) throw new IllegalArgumentException("Cannot create a directory " + resource.getTarget() + " because file is present on the given path."); // Check consistency for ("type":"directory" == directory in hadoop) else if (dfs.isDirectory(new Path(resource.getTarget())) && !"directory".equals(resource.getType())) throw new IllegalArgumentException("Cannot create a file " + resource.getTarget() + " because directory is present on the given path."); if (resource.getSource() != null) { File source = new File(resource.getSource()); if (source.isFile() && !"file".equals(resource.getType())) throw new IllegalArgumentException("Cannot create a directory " + resource.getTarget() + " because source " + resource.getSource() + "is a file"); else if (source.isDirectory() && !"directory".equals(resource.getType())) throw new IllegalArgumentException("Cannot create a file " + resource.getTarget() + " because source " + resource.getSource() + "is a directory"); }/*from w w w . j a v a 2s .co m*/ }
From source file:org.apache.avro.tool.Util.java
License:Apache License
/**If pathname is a file, this method returns a list with a single absolute Path to that file, * if pathname is a directory, this method returns a list of Pathes to all the files within * this directory.//w ww . j a v a2s . c o m * Only files inside that directory are included, no subdirectories or files in subdirectories * will be added. * The List is sorted alphabetically. * @param fileOrDirName filename or directoryname * @return A Path List * @throws IOException */ static List<Path> getFiles(String fileOrDirName) throws IOException { List<Path> pathList = new ArrayList<Path>(); Path path = new Path(fileOrDirName); FileSystem fs = path.getFileSystem(new Configuration()); if (fs.isFile(path)) { pathList.add(path); } else if (fs.getFileStatus(path).isDir()) { for (FileStatus status : fs.listStatus(path)) { if (!status.isDir()) { pathList.add(status.getPath()); } } } Collections.sort(pathList); return pathList; }