List of usage examples for org.apache.hadoop.fs FileSystem isFile
@Deprecated public boolean isFile(Path f) throws IOException
From source file:com.cloudera.impala.analysis.CreateTableLikeFileStmt.java
License:Apache License
/** * Reads the first block from the given HDFS file and returns the Parquet schema. * Throws Analysis exception for any failure, such as failing to read the file * or failing to parse the contents.//from ww w. j a v a 2s . c o m */ private static parquet.schema.MessageType loadParquetSchema(Path pathToFile) throws AnalysisException { try { FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration()); if (!fs.isFile(pathToFile)) { throw new AnalysisException("Cannot infer schema, path is not a file: " + pathToFile); } } catch (IOException e) { throw new AnalysisException("Failed to connect to HDFS:" + e); } ParquetMetadata readFooter = null; try { readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(), pathToFile); } catch (FileNotFoundException e) { throw new AnalysisException("File not found: " + e); } catch (IOException e) { throw new AnalysisException("Failed to open HDFS file as a parquet file: " + e); } catch (RuntimeException e) { // Parquet throws a generic RuntimeException when reading a non-parquet file if (e.toString().contains("is not a Parquet file")) { throw new AnalysisException("File is not a parquet file: " + pathToFile); } // otherwise, who knows what we caught, throw it back up throw e; } return readFooter.getFileMetaData().getSchema(); }
From source file:com.dalabs.droop.util.password.FilePasswordLoader.java
License:Apache License
/** * Verify that given path leads to a file that we can read. * * @param fs Associated FileSystem/*from ww w .j ava 2s. c o m*/ * @param path Path * @throws IOException */ protected void verifyPath(FileSystem fs, Path path) throws IOException { if (!fs.exists(path)) { throw new IOException("The provided password file " + path + " does not exist!"); } if (!fs.isFile(path)) { throw new IOException("The provided password file " + path + " is a directory!"); } }
From source file:com.dasasian.chok.node.ShardManager.java
License:Apache License
private void installShard(String shardName, String shardPath, File localShardFolder) throws ChokException { LOG.info("install shard '" + shardName + "' from " + shardPath); // TODO sg: to fix HADOOP-4422 we try to download the shard 5 times int maxTries = 5; for (int i = 0; i < maxTries; i++) { URI uri;//www .j av a 2 s .co m try { uri = new URI(shardPath); FileSystem fileSystem = FileSystem.get(uri, new Configuration()); if (throttleSemaphore != null) { fileSystem = new ThrottledFileSystem(fileSystem, throttleSemaphore); } final Path path = new Path(shardPath); boolean isZip = fileSystem.isFile(path) && shardPath.endsWith(".zip"); File shardTmpFolder = new File(localShardFolder.getAbsolutePath() + "_tmp"); try { FileUtil.deleteFolder(localShardFolder); FileUtil.deleteFolder(shardTmpFolder); if (isZip) { FileUtil.unzip(path, shardTmpFolder, fileSystem, "true".equalsIgnoreCase(System.getProperty("chok.spool.zip.shards", "false"))); } else { fileSystem.copyToLocalFile(path, new Path(shardTmpFolder.getAbsolutePath())); } shardTmpFolder.renameTo(localShardFolder); } finally { // Ensure that the tmp folder is deleted on an error FileUtil.deleteFolder(shardTmpFolder); } // Looks like we were successful. if (i > 0) { LOG.error("Loaded shard:" + shardPath); } return; } catch (final URISyntaxException e) { throw new ChokException("Can not parse uri for path: " + shardPath, e); } catch (final Exception e) { LOG.error(String.format("Error loading shard: %s (try %d of %d)", shardPath, i, maxTries), e); if (i >= maxTries - 1) { throw new ChokException("Can not load shard: " + shardPath, e); } } } }
From source file:com.datasayer.meerkat.MeerJobRunner.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w ww. jav a 2s . com*/ public void bsp(final BSPPeer<Writable, Writable, Writable, Writable, Writable> peer) throws IOException, SyncException, InterruptedException { while (true) { try { long currentTime = System.currentTimeMillis(); FileSystem fs = FileSystem.get(conf); if (!fs.isFile(logPath)) { System.out.println("can not read input file"); return; } RandomAccessFile file = new RandomAccessFile(logPath.toString(), "r"); long fileLength = file.length(); if (fileLength > filePointer) { file.seek(filePointer); String line = null; while (file.length() > file.getFilePointer()) { line = file.readLine(); line = new String(line.getBytes("8859_1"), "utf-8"); guardMeer.observe(line); } filePointer = file.getFilePointer(); } else { // nothing to do } file.close(); long timeDiff = currentTime - this.lastAggregatedTime; if (timeDiff >= this.aggregationInterval) { peer.sync(); if (peer.getPeerName().equals(masterName)) { bossMeer.masterCompute(new Iterator<Writable>() { private final int producedMessages = peer.getNumCurrentMessages(); private int consumedMessages = 0; @Override public boolean hasNext() { return producedMessages > consumedMessages; } @Override public Writable next() throws NoSuchElementException { if (consumedMessages >= producedMessages) { throw new NoSuchElementException(); } try { consumedMessages++; return peer.getCurrentMessage(); } catch (IOException e) { throw new NoSuchElementException(); } } @Override public void remove() { // BSPPeer.getCurrentMessage originally deletes a message. // Thus, it doesn't need to throw exception. // throw new UnsupportedOperationException(); } }, signalMeer); this.lastAggregatedTime = currentTime; } } } catch (IOException e) { e.printStackTrace(); } } }
From source file:com.ebay.erl.mobius.core.builder.AbstractDatasetBuilder.java
License:Apache License
/** * Add the <code>paths</code> to the underline dataset. A boolean * flag <code>validatePathExistance</code> to specify if Mobius * needs to verify the specified <code>paths</code> exist or not. * <p>//from w w w . ja v a2s . c o m * * If <code>validatePathExistance</code> is true, and one of the * <code>paths</code> doesn't exist, <code>IOException</code> will * be thrown. * <p> * * If a path exists and it's a folder, {@link #checkTouchFile(FileSystem, Path)} * will be called to see if a touch file exists under that folder or not. * The default implementation of <code>checkTouchFile</code> always return * true, which means the dataset builder doesn't check touch file by default. * If this is a need to check touch file, the subclass should override that * function, and when the funciton return false, <code>IOException</code> * will be thrown here for that specific path. */ protected ACTUAL_BUILDER_IMPL addInputPath(boolean validatePathExistance, Path... paths) throws IOException { if (paths == null || paths.length == 0) { throw new IllegalArgumentException("Please specify at least one path"); } FileSystem fs = FileSystem.get(this.mobiusJob.getConf()); for (Path aPath : paths) { FileStatus[] fileStatus = null; try { fileStatus = fs.globStatus(aPath); } catch (NullPointerException e) { LOGGER.warn("FileSystem list globStatus thrown NPE", e); } if (fileStatus == null) { if (validatePathExistance) { throw new FileNotFoundException(aPath.toString() + " doesn't exist on file system."); } else { // no need to validate, as the input // for this dataset is coming from // the output of the other dataset. this.getDataset().addInputs(aPath); } } else { // file(s) exists, add inputs for (FileStatus aFileStatus : fileStatus) { Path p = aFileStatus.getPath(); if (!fs.isFile(p)) { if (!this.checkTouchFile(fs, p)) { throw new IllegalStateException( "No touch file under " + p.toString() + ", this dataset is not ready."); } else { this.getDataset().addInputs(p); } } else { this.getDataset().addInputs(p); } } } } return (ACTUAL_BUILDER_IMPL) this; }
From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java
License:Apache License
private static Matrix loadVectors(String vectorPathString, Configuration conf) throws IOException { Path vectorPath = new Path(vectorPathString); FileSystem fs = vectorPath.getFileSystem(conf); List<Path> subPaths = Lists.newArrayList(); if (fs.isFile(vectorPath)) { subPaths.add(vectorPath);/*from w w w .j a v a 2 s .c o m*/ } else { for (FileStatus fileStatus : fs.listStatus(vectorPath, PathFilters.logsCRCFilter())) { subPaths.add(fileStatus.getPath()); } } List<Vector> vectorList = Lists.newArrayList(); for (Path subPath : subPaths) { for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>( subPath, true, conf)) { vectorList.add(record.getSecond().get()); } } int numRows = vectorList.size(); int numCols = vectorList.get(0).size(); return new SparseRowMatrix(numRows, numCols, vectorList.toArray(new Vector[vectorList.size()]), true, vectorList.get(0).isSequentialAccess()); }
From source file:com.facebook.hiveio.common.FileSystems.java
License:Apache License
/** * Move a file or directory from source to destination, recursively copying * subdirectories./* w w w . ja v a 2s . c om*/ * * @param fs FileSystem * @param file path to copy (file or directory) * @param src path to source directory * @param dest path to destination directory * @throws IOException I/O problems */ public static void move(FileSystem fs, Path file, Path src, Path dest) throws IOException { Path destFilePath = pathInDestination(file, src, dest); if (fs.isFile(file)) { if (fs.exists(destFilePath)) { if (!fs.delete(destFilePath, true)) { throw new IllegalArgumentException("Could not remove existing file " + destFilePath); } } if (!fs.rename(file, destFilePath)) { throw new IllegalArgumentException("Could not move " + file + " to " + destFilePath); } } else if (fs.getFileStatus(file).isDir()) { FileStatus[] statuses = fs.listStatus(file); fs.mkdirs(destFilePath); if (statuses != null) { for (FileStatus status : statuses) { move(fs, status.getPath(), src, dest); } } } }
From source file:com.github.seqware.queryengine.plugins.hbasemr.MRHBasePluginRunner.java
License:Open Source License
public File handleFileResult(Path path) { FileSystem fs = null; try {/*from w ww. j ava2 s . c om*/ Path outputPartPath = new Path(path, "part-r-00000"); // copy file from HDFS to local temporary file Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Source file is " + outputPartPath.toString()); Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); fs = FileSystem.get(conf); File createTempFile = File.createTempFile("vcf", "out"); createTempFile.delete(); Path outPath = new Path(createTempFile.toURI()); FileSystem localSystem = FileSystem.get(new Configuration()); Logger.getLogger(FeaturesByFilterPlugin.class.getName()) .info("Destination file is " + outPath.toString()); if (!fs.exists(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input file not found"); } if (!fs.isFile(outputPartPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Input should be a file"); } if (localSystem.exists(outPath)) { Logger.getLogger(FeaturesByFilterPlugin.class.getName()).fatal("Output already exists"); } // doesn't quite work yet, no time to finish before poster, check results manually on hdfs FileUtil.copy(fs, outputPartPath, localSystem, outPath, true, true, conf); return new File(outPath.toUri()); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()).fatal(null, ex); } finally { if (fs != null) { try { fs.delete(path, true); } catch (IOException ex) { Logger.getLogger(VCFDumperPlugin.class.getName()) .warn("IOException when clearing after text output", ex); } } } return null; }
From source file:com.huayu.metis.flume.sink.hdfs.HDFSSequenceFile.java
License:Apache License
@Override public void open(String filePath) throws IOException { Configuration conf = new Configuration(); Path dstPath = new Path(filePath); FileSystem fileSystem = dstPath.getFileSystem(conf); //2.2Hadoop, dfs.append.support ??? if (fileSystem.exists(dstPath) && fileSystem.isFile(dstPath)) { outStream = fileSystem.append(dstPath); } else {/*w w w .ja v a 2s .c o m*/ outStream = fileSystem.create(dstPath); } writer = SequenceFile.createWriter(conf, SequenceFile.Writer.stream(outStream), SequenceFile.Writer.keyClass(serializer.getKeyClass()), SequenceFile.Writer.valueClass(serializer.getValueClass())); registerCurrentStream(outStream, fileSystem, dstPath); }
From source file:com.ibm.bi.dml.runtime.io.WriterMatrixMarket.java
License:Open Source License
/** * //w w w. j av a2s . c om * @param srcFileName * @param fileName * @param rlen * @param clen * @param nnz * @throws IOException */ public void mergeTextcellToMatrixMarket(String srcFileName, String fileName, long rlen, long clen, long nnz) throws IOException { Configuration conf = new Configuration(ConfigurationManager.getCachedJobConf()); Path src = new Path(srcFileName); Path merge = new Path(fileName); FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(merge)) { hdfs.delete(merge, true); } OutputStream out = hdfs.create(merge, true); // write out the header first StringBuilder sb = new StringBuilder(); sb.append("%%MatrixMarket matrix coordinate real general\n"); // output number of rows, number of columns and number of nnz sb.append(rlen + " " + clen + " " + nnz + "\n"); out.write(sb.toString().getBytes()); // if the source is a directory if (hdfs.getFileStatus(src).isDirectory()) { try { FileStatus[] contents = hdfs.listStatus(src); for (int i = 0; i < contents.length; i++) { if (!contents[i].isDirectory()) { InputStream in = hdfs.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, conf, false); } finally { IOUtilFunctions.closeSilently(in); } } } } finally { IOUtilFunctions.closeSilently(out); } } else if (hdfs.isFile(src)) { InputStream in = null; try { in = hdfs.open(src); IOUtils.copyBytes(in, out, conf, true); } finally { IOUtilFunctions.closeSilently(in); IOUtilFunctions.closeSilently(out); } } else { throw new IOException(src.toString() + ": No such file or directory"); } }