List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:kogiri.mapreduce.preprocess.common.helpers.KmerIndexHelper.java
License:Open Source License
public static Path[] getKmerIndexPartFilePath(Configuration conf, Path inputPath) throws IOException { List<Path> inputFiles = new ArrayList<Path>(); KmerIndexPartPathFilter filter = new KmerIndexPartPathFilter(); Path indexDir = inputPath.getParent(); FileSystem fs = indexDir.getFileSystem(conf); if (fs.exists(indexDir)) { FileStatus status = fs.getFileStatus(indexDir); if (status.isDir()) { // check child FileStatus[] entries = fs.listStatus(indexDir); for (FileStatus entry : entries) { if (entry.isDir()) { if (filter.accept(entry.getPath())) { if (isSameKmerIndex(inputPath, entry.getPath())) { inputFiles.add(entry.getPath()); }/*from w ww. ja v a 2 s . co m*/ } } } } } return inputFiles.toArray(new Path[0]); }
From source file:kogiri.mapreduce.preprocess.indexing.stage1.ReadIndexBuilder.java
License:Open Source License
private void commit(Path outputPath, Configuration conf, NamedOutputs namedOutputs) throws IOException { FileSystem fs = outputPath.getFileSystem(conf); FileStatus status = fs.getFileStatus(outputPath); if (status.isDir()) { FileStatus[] entries = fs.listStatus(outputPath); for (FileStatus entry : entries) { Path entryPath = entry.getPath(); // remove unnecessary outputs if (MapReduceHelper.isLogFiles(entryPath)) { fs.delete(entryPath, true); } else if (MapReduceHelper.isPartialOutputFiles(entryPath)) { fs.delete(entryPath, true); } else if (KmerHistogramHelper.isKmerHistogramFile(entryPath)) { // not necessary } else { // rename outputs NamedOutputRecord namedOutput = namedOutputs.getRecordFromMROutput(entryPath.getName()); if (namedOutput != null) { Path toPath = new Path(entryPath.getParent(), ReadIndexHelper.makeReadIndexFileName(namedOutput.getFilename())); LOG.info("output : " + entryPath.toString()); LOG.info("renamed to : " + toPath.toString()); fs.rename(entryPath, toPath); }/*from ww w. j av a 2s .c o m*/ } } } else { throw new IOException("path not found : " + outputPath.toString()); } }
From source file:kogiri.mapreduce.readfrequency.kmermatch.KmerMatcher.java
License:Open Source License
private void commit(Path outputPath, Configuration conf) throws IOException { FileSystem fs = outputPath.getFileSystem(conf); FileStatus status = fs.getFileStatus(outputPath); if (status.isDir()) { FileStatus[] entries = fs.listStatus(outputPath); for (FileStatus entry : entries) { Path entryPath = entry.getPath(); // remove unnecessary outputs if (MapReduceHelper.isLogFiles(entryPath)) { fs.delete(entryPath, true); } else if (MapReduceHelper.isPartialOutputFiles(entryPath)) { // rename outputs int mapreduceID = MapReduceHelper.getMapReduceID(entryPath); String newName = KmerMatchHelper.makeKmerMatchResultFileName(mapreduceID); Path toPath = new Path(entryPath.getParent(), newName); LOG.info("output : " + entryPath.toString()); LOG.info("renamed to : " + toPath.toString()); fs.rename(entryPath, toPath); } else { // let it be }/*from www. j a v a 2s .c o m*/ } } else { throw new IOException("path not found : " + outputPath.toString()); } }
From source file:madgik.exareme.worker.arm.storage.client.cluster.ClusterArmStorageClient.java
/** * @param src//from w w w . j a v a2 s .c o m * @param dest * @throws ArmStorageClientException */ @Override public void put(String src, String dest) throws ArmStorageClientException { // connected ? if (this.fs == null) throw new ArmStorageClientException("Not connected!"); // validate parameters if (src == null || src.isEmpty()) throw new ArmStorageClientException("No valid src file path!"); if (dest == null || dest.isEmpty()) throw new ArmStorageClientException("No valid dest file path!"); // src file exist? File srcFile = new File(src); if (!srcFile.exists() || !srcFile.isFile() || srcFile.length() == 0) throw new ArmStorageClientException("src file does not exist!"); // copy ! FSDataOutputStream out = null; InputStream in = null; try { in = new FileInputStream(srcFile); Path destPath = new Path(dest); fs.mkdirs(destPath.getParent()); if (this.fs.exists(destPath)) throw new ArmStorageClientException("dest file allready exist!"); //long blocksize = ArmStorageClientUtils.roundFileLength(srcFile.length()); long currentBlockSize = blocksize == -1 ? ArmStorageClientUtils.roundFileLength(srcFile.length()) : blocksize; out = this.fs.create(destPath, true, buffersize, (short) replication, currentBlockSize); IOUtils.copyBytes(in, out, buffersize); } catch (IOException ex) { throw new ArmStorageClientException("Put failure : ", ex); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } }
From source file:madgik.exareme.worker.arm.storage.client.cluster.HDFSArmStorageClient.java
/** * @param src//w w w .java 2s. c om * @param dest * @throws ArmStorageClientException */ @Override public void put(String src, String dest) throws ArmStorageClientException { // connected ? if (this.fs == null) throw new ArmStorageClientException("Not connected."); // validate parameters if (src == null || src.isEmpty()) throw new ArmStorageClientException("No valid src file path!"); if (dest == null || dest.isEmpty()) throw new ArmStorageClientException("No valid dest file path!"); // src file exist? File srcFile = new File(src); if (!srcFile.exists() || !srcFile.isFile() || srcFile.length() == 0) throw new ArmStorageClientException("src file does not exist!"); // copy ! FSDataOutputStream out = null; InputStream in = null; try { in = new FileInputStream(srcFile); Path destPath = new Path(dest); fs.mkdirs(destPath.getParent()); if (this.fs.exists(destPath)) throw new ArmStorageClientException("dest file allready exist!"); //long blocksize = ArmStorageClientUtils.roundFileLength(srcFile.length()); long currentBlockSize = blocksize == -1 ? ArmStorageClientUtils.roundFileLength(srcFile.length()) : blocksize; out = this.fs.create(destPath, true, buffersize, (short) replication, currentBlockSize); IOUtils.copyBytes(in, out, buffersize); } catch (IOException ex) { throw new ArmStorageClientException("Put failure : ", ex); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } }
From source file:name.abhijitsarkar.hadoop.io.IOUtils.java
License:Open Source License
/** * //from ww w .j a v a2 s. c om * @param inputURI * The file from which MapFile is created * @param conf * Job configuration * @return MapFile URI * @throws Exception * If fails to create the MapFile */ @SuppressWarnings("resource") public static URI createMapFile(URI inputURI, final Configuration conf) throws Exception { LOGGER.debug("Attempting to create MapFile from input URI: {}.", inputURI); inputURI = uncompressFile(inputURI, conf); final Path inputPath = new Path(inputURI); final FileSystem fs = inputPath.getFileSystem(conf); final InputStream data = new FileInputStream(inputURI.getPath()); final BufferedReader reader = new BufferedReader(new InputStreamReader(data, StandardCharsets.UTF_8)); String line = null; final URI mapFileURI = new Path(inputPath.getParent(), "map_file").toUri(); LOGGER.debug("MapFile URI: {}.", mapFileURI); /* * For this method to be truly generic, caller should pass in the key and value classes */ final MapFile.Writer writer = new MapFile.Writer(conf, fs, mapFileURI.toString(), Text.class, Text.class); String[] tokens = null; String key = null; String value = null; while ((line = reader.readLine()) != null) { tokens = line.split("\\s", 2); /* * For this method to be truly generic, caller should pass in a token parser */ if (tokens == null || tokens.length < 2) { LOGGER.error("Don't know how to parse line: {}.", line); throw new RuntimeException("Don't know how to parse line: " + line); } key = tokens[0]; value = tokens[1]; writer.append(new Text(key), new Text(value)); } closeStreams(writer, reader, data); return mapFileURI; }
From source file:name.abhijitsarkar.hadoop.io.IOUtils.java
License:Open Source License
/** * //from w w w .j a v a 2 s . com * @param uncompressedURI * The file to be archived * @param codecName * The codec to be used for archiving * @param conf * Job configuration * @return The archive URI * @throws Exception * If fails to create the archive */ public static URI compressFile(final URI uncompressedURI, final String codecName, final Configuration conf) throws Exception { /* * Hadoop 1.1.2 does not have a CompressionCodecFactory#getCodecByName method. Instantiating GzipCodec by using * new operator throws NPE (probably Hadoop bug). */ final CompressionCodec codec = getCodecByClassName(GzipCodec.class.getName(), conf); final Path uncompressedPath = new Path(uncompressedURI); String archiveName = addExtension(uncompressedPath.getName(), codec.getDefaultExtension(), true); final Path archivePath = new Path(uncompressedPath.getParent(), archiveName); LOGGER.debug("uncompressedURI: {}.", uncompressedURI); LOGGER.debug("archiveURI: {}", archivePath.toString()); OutputStream outputStream = null; InputStream inputStream = null; CompressionOutputStream out = null; try { outputStream = new FileOutputStream(archivePath.toUri().getPath()); inputStream = new FileInputStream(uncompressedURI.getPath()); out = codec.createOutputStream(outputStream); org.apache.hadoop.io.IOUtils.copyBytes(inputStream, out, conf, false); out.finish(); } catch (IOException e) { throw e; } finally { closeStreams(inputStream, outputStream, out); } return archivePath.toUri(); }
From source file:org.apache.accumulo.core.client.mock.MockTableOperationsTest.java
License:Apache License
private ImportTestFilesAndData prepareTestFiles() throws Throwable { Configuration defaultConf = new Configuration(); Path tempFile = new Path("target/accumulo-test/import/sample.rf"); Path failures = new Path("target/accumulo-test/failures/"); FileSystem fs = FileSystem.get(new URI("file:///"), defaultConf); fs.deleteOnExit(tempFile);//from ww w . ja va2 s. c o m fs.deleteOnExit(failures); fs.delete(failures, true); fs.delete(tempFile, true); fs.mkdirs(failures); fs.mkdirs(tempFile.getParent()); FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(tempFile.toString(), fs, defaultConf) .withTableConfiguration(AccumuloConfiguration.getDefaultConfiguration()).build(); writer.startDefaultLocalityGroup(); List<Pair<Key, Value>> keyVals = new ArrayList<>(); for (int i = 0; i < 5; i++) { keyVals.add(new Pair<>(new Key("a" + i, "b" + i, "c" + i, new ColumnVisibility(""), 1000l + i), new Value(Integer.toString(i).getBytes()))); } for (Pair<Key, Value> keyVal : keyVals) { writer.append(keyVal.getFirst(), keyVal.getSecond()); } writer.close(); ImportTestFilesAndData files = new ImportTestFilesAndData(); files.failurePath = failures; files.importPath = tempFile.getParent(); files.keyVals = keyVals; return files; }
From source file:org.apache.accumulo.master.tableOps.BulkImport.java
License:Apache License
private String prepareBulkImport(Master master, final VolumeManager fs, String dir, String tableId) throws Exception { final Path bulkDir = createNewBulkDir(fs, tableId); MetadataTableUtil.addBulkLoadInProgressFlag(master, "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); Path dirPath = new Path(dir); FileStatus[] mapFiles = fs.listStatus(dirPath); final UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); int workerCount = master.getConfiguration().getCount(Property.MASTER_BULK_RENAME_THREADS); SimpleThreadPool workers = new SimpleThreadPool(workerCount, "bulk move"); List<Future<Exception>> results = new ArrayList<>(); for (FileStatus file : mapFiles) { final FileStatus fileStatus = file; results.add(workers.submit(new Callable<Exception>() { @Override//from w w w . ja v a2 s . co m public Exception call() throws Exception { try { String sa[] = fileStatus.getPath().getName().split("\\."); String extension = ""; if (sa.length > 1) { extension = sa[sa.length - 1]; if (!FileOperations.getValidExtensions().contains(extension)) { log.warn(fileStatus.getPath() + " does not have a valid extension, ignoring"); return null; } } else { // assume it is a map file extension = Constants.MAPFILE_EXTENSION; } if (extension.equals(Constants.MAPFILE_EXTENSION)) { if (!fileStatus.isDirectory()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); return null; } if (fileStatus.getPath().getName().equals("_logs")) { log.info(fileStatus.getPath() + " is probably a log directory from a map/reduce task, skipping"); return null; } try { FileStatus dataStatus = fs .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME)); if (dataStatus.isDirectory()) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); return null; } } catch (FileNotFoundException fnfe) { log.warn(fileStatus.getPath() + " is not a map file, ignoring"); return null; } } String newName = "I" + namer.getNextName() + "." + extension; Path newPath = new Path(bulkDir, newName); try { fs.rename(fileStatus.getPath(), newPath); log.debug("Moved " + fileStatus.getPath() + " to " + newPath); } catch (IOException E1) { log.error("Could not move: {} {}", fileStatus.getPath().toString(), E1.getMessage()); } } catch (Exception ex) { return ex; } return null; } })); } workers.shutdown(); while (!workers.awaitTermination(1000L, TimeUnit.MILLISECONDS)) { } for (Future<Exception> ex : results) { if (ex.get() != null) { throw ex.get(); } } return bulkDir.toString(); }
From source file:org.apache.accumulo.master.tableOps.bulkVer1.BulkImport.java
License:Apache License
private String prepareBulkImport(Master master, final VolumeManager fs, String dir, Table.ID tableId) throws Exception { final Path bulkDir = createNewBulkDir(fs, tableId); MetadataTableUtil.addBulkLoadInProgressFlag(master, "/" + bulkDir.getParent().getName() + "/" + bulkDir.getName()); Path dirPath = new Path(dir); FileStatus[] mapFiles = fs.listStatus(dirPath); final UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); int workerCount = master.getConfiguration().getCount(Property.MASTER_BULK_RENAME_THREADS); SimpleThreadPool workers = new SimpleThreadPool(workerCount, "bulk move"); List<Future<Exception>> results = new ArrayList<>(); for (FileStatus file : mapFiles) { final FileStatus fileStatus = file; results.add(workers.submit(() -> { try { String sa[] = fileStatus.getPath().getName().split("\\."); String extension = ""; if (sa.length > 1) { extension = sa[sa.length - 1]; if (!FileOperations.getValidExtensions().contains(extension)) { log.warn("{} does not have a valid extension, ignoring", fileStatus.getPath()); return null; }//from ww w . java2s . co m } else { // assume it is a map file extension = Constants.MAPFILE_EXTENSION; } if (extension.equals(Constants.MAPFILE_EXTENSION)) { if (!fileStatus.isDirectory()) { log.warn("{} is not a map file, ignoring", fileStatus.getPath()); return null; } if (fileStatus.getPath().getName().equals("_logs")) { log.info("{} is probably a log directory from a map/reduce task, skipping", fileStatus.getPath()); return null; } try { FileStatus dataStatus = fs .getFileStatus(new Path(fileStatus.getPath(), MapFile.DATA_FILE_NAME)); if (dataStatus.isDirectory()) { log.warn("{} is not a map file, ignoring", fileStatus.getPath()); return null; } } catch (FileNotFoundException fnfe) { log.warn("{} is not a map file, ignoring", fileStatus.getPath()); return null; } } String newName = "I" + namer.getNextName() + "." + extension; Path newPath = new Path(bulkDir, newName); try { fs.rename(fileStatus.getPath(), newPath); log.debug("Moved {} to {}", fileStatus.getPath(), newPath); } catch (IOException E1) { log.error("Could not move: {} {}", fileStatus.getPath(), E1.getMessage()); } } catch (Exception ex) { return ex; } return null; })); } workers.shutdown(); while (!workers.awaitTermination(1000L, TimeUnit.MILLISECONDS)) { } for (Future<Exception> ex : results) { if (ex.get() != null) { throw ex.get(); } } return bulkDir.toString(); }