List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.redsqirl.workflow.server.datatype.MapRedDir.java
License:Open Source License
public String isPathValid(String path, List<String> shouldNotHaveExt, List<String> shouldHaveExt, boolean fileExtension) throws RemoteException { String error = null;/*from ww w . jav a 2 s. c om*/ HdfsFileChecker hCh = new HdfsFileChecker(path); if (shouldHaveExt != null && !shouldHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldHaveExt) { found |= path.endsWith(extCur); } if (!found) { error = LanguageManagerWF.getText("mapredtexttype.shouldhaveextcompresssile", new Object[] { path, shouldHaveExt }); } } else if (shouldNotHaveExt != null && !shouldNotHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldNotHaveExt) { found |= path.endsWith(extCur); } if (found) { error = LanguageManagerWF.getText("mapredtexttype.shouldnothaveextcompresssile", new Object[] { path, shouldNotHaveExt }); } } if (!hCh.isInitialized() || hCh.isFile()) { error = LanguageManagerWF.getText("mapredtexttype.dirisfile"); } else if (isPathExist()) { FileSystem fs; try { fs = NameNodeVar.getFS(); hCh.setPath(new Path(path).getParent()); if (!hCh.isDirectory()) { error = LanguageManagerWF.getText("mapredtexttype.nodir", new String[] { hCh.getPath().toString() }); } FileStatus[] stat = null; if (error == null) { try { stat = fs.listStatus(new Path(path), new PathFilter() { @Override public boolean accept(Path arg0) { return !arg0.getName().startsWith("_") && !arg0.getName().startsWith("."); } }); } catch (Exception e) { stat = null; error = LanguageManagerWF.getText("mapredtexttype.notmrdir", new Object[] { path }); } } if (stat != null) { for (int i = 0; i < stat.length && error == null; ++i) { if (stat[i].isDir()) { error = LanguageManagerWF.getText("mapredtexttype.notmrdir", new Object[] { path }); } else { if (fileExtension) { if (shouldHaveExt != null && !shouldHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldHaveExt) { found |= stat[i].getPath().getName().endsWith(extCur); } if (!found) { error = LanguageManagerWF.getText( "mapredtexttype.shouldhaveextcompresssile", new Object[] { path, shouldHaveExt }); } } else if (shouldNotHaveExt != null && !shouldNotHaveExt.isEmpty()) { boolean found = false; for (String extCur : shouldNotHaveExt) { found |= stat[i].getPath().getName().endsWith(extCur); } if (found) { error = LanguageManagerWF.getText( "mapredtexttype.shouldnothaveextcompresssile", new Object[] { path, shouldNotHaveExt }); } } } try { hdfsInt.select(stat[i].getPath().toString(), "", 1); } catch (Exception e) { error = LanguageManagerWF.getText("mapredtexttype.notmrdir"); logger.error(error, e); } } } } } catch (IOException e) { error = LanguageManagerWF.getText("unexpectedexception", new Object[] { e.getMessage() }); logger.error(error, e); } } // hCh.close(); return error; }
From source file:com.redsqirl.workflow.server.datatype.MapRedDir.java
License:Open Source License
public List<String> selectLine(int maxToRead) throws RemoteException { List<String> ans = null; if (isPathExist()) { try {// ww w .j av a 2s. c o m FileSystem fs = NameNodeVar.getFS(); FileStatus[] stat = fs.listStatus(new Path(getPath()), new PathFilter() { @Override public boolean accept(Path arg0) { return !arg0.getName().startsWith("_") && !arg0.getName().startsWith("."); } }); if (stat != null && stat.length > 0) { ans = new ArrayList<String>(maxToRead); SortedSet<Map.Entry<FileStatus, Long>> filesSortedBySize = new TreeSet<Map.Entry<FileStatus, Long>>( new Comparator<Map.Entry<FileStatus, Long>>() { @Override public int compare(Map.Entry<FileStatus, Long> e1, Map.Entry<FileStatus, Long> e2) { return -e1.getValue().compareTo(e2.getValue()); } }); //We limit the number of file to be 100 for (int k = 0; k < stat.length; ++k) { filesSortedBySize .add(new AbstractMap.SimpleEntry<FileStatus, Long>(stat[k], stat[k].getLen())); } //Read the biggest files first Iterator<Map.Entry<FileStatus, Long>> fileIt = filesSortedBySize.iterator(); int k = 0; while (fileIt.hasNext() && ans.size() < maxToRead && k < NB_FILE_TO_READ_MAX) { Map.Entry<FileStatus, Long> cur = fileIt.next(); FileStatus file = cur.getKey(); logger.debug("Number of line already read: " + ans.size()); ans.addAll(hdfsInt.select(file.getPath().toString(), ",", maxToRead - ans.size())); ++k; } logger.debug("Number of line read in " + getPath() + ": " + ans.size()); } } catch (IOException e) { String error = "Unexpected error: " + e.getMessage(); logger.error(error, e); ans = null; } catch (Exception e) { logger.error(e, e); ans = null; } } return ans; }
From source file:com.redsqirl.workflow.server.OozieManager.java
License:Open Source License
/** * Clean the directory where the Job details are stored * /*w ww. j a v a 2s .c o m*/ * @param nameWf * @throws RemoteException */ public void cleanJobDirectory(final String nameWf) throws RemoteException { Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs()); FileSystem fs = null; int numberToKeep = WorkflowPrefManager.getNbOozieDirToKeep(); try { fs = NameNodeVar.getFS(); FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() { @Override public boolean accept(Path arg0) { return arg0.getName().startsWith(nameWf + "_"); } }); Arrays.sort(children, 0, children.length, new Comparator<FileStatus>() { @Override public int compare(FileStatus arg0, FileStatus arg1) { return (int) ((arg0.getModificationTime() - arg1.getModificationTime()) / 10000); } }); for (int i = 0; i < children.length - numberToKeep; ++i) { fs.delete(children[i].getPath(), true); } } catch (Exception e1) { logger.error(e1); } }
From source file:com.redsqirl.workflow.server.OozieManager.java
License:Open Source License
/** * Get a name for a directory to store all the jobs files and configuration * /*from w w w . j a v a 2s . c o m*/ * @param df * @return The name for a directory to store all the jobs files and configuration * @throws RemoteException */ protected String buildFileName(DataFlow df) throws RemoteException { final String nameWf = df.getName(); if (nameWf == null) { logger.warn("The workflow to run has no name"); df.setName(RandomString.getRandomName(8)); } String ans = null; Path hdfsWfPath = new Path(WorkflowPrefManager.getHDFSPathJobs()); FileSystem fs = null; int number = -1; try { fs = NameNodeVar.getFS(); FileStatus[] children = fs.listStatus(hdfsWfPath, new PathFilter() { @Override public boolean accept(Path arg0) { if (arg0.getName().startsWith(nameWf)) { try { @SuppressWarnings("unused") int i = Integer.valueOf(arg0.getName().substring(nameWf.length() + 1)); return true; } catch (Exception e) { } } return false; } }); if (children != null && children.length > 0) { for (FileStatus child : children) { number = Math.max(number, Integer.valueOf(child.getPath().getName().substring(nameWf.length() + 1))); } } } catch (Exception e) { logger.error(e, e); } ans = nameWf + "_" + (number + 1); return ans; }
From source file:com.redsqirl.workflow.server.Workflow.java
License:Open Source License
/** * Clean the backup directory/* ww w. ja v a2 s . c o m*/ * * @throws IOException */ public void cleanUpBackup() throws IOException { String path = WorkflowPrefManager.getBackupPath(); int nbBackup = WorkflowPrefManager.getNbBackup(); FileSystem fs = NameNodeVar.getFS(); // FileStatus stat = fs.getFileStatus(new Path(path)); FileStatus[] fsA = fs.listStatus(new Path(path), new PathFilter() { @Override public boolean accept(Path arg0) { return arg0.getName().matches(".*[0-9]{14}(.rs|.srs)$"); } }); logger.debug("Backup directory: " + fsA.length + " files, " + nbBackup + " to keep, " + Math.max(0, fsA.length - nbBackup) + " to remove"); if (fsA.length > nbBackup) { int numberToRemove = fsA.length - nbBackup; Map<Path, Long> pathToRemove = new HashMap<Path, Long>(); Path pathMin = null; Long min = Long.MAX_VALUE; for (FileStatus stat : fsA) { if (pathToRemove.size() < numberToRemove) { pathToRemove.put(stat.getPath(), stat.getModificationTime()); } else if (min > stat.getModificationTime()) { pathToRemove.remove(pathMin); pathToRemove.put(stat.getPath(), stat.getModificationTime()); } if (min > stat.getModificationTime()) { min = stat.getModificationTime(); pathMin = stat.getPath(); } } for (Path pathDel : pathToRemove.keySet()) { fs.delete(pathDel, false); } } // fs.close(); }
From source file:com.renren.hadoop.util.LuceneIndexFileNameFilter.java
License:Apache License
public boolean accept(Path path) { return luceneFilter.accept(null, path.getName()); }
From source file:com.ricemap.spateDB.core.GridRecordWriter.java
License:Apache License
/** * Close the given cell freeing all memory reserved by it. * Once a cell is closed, we should not write more data to it. * @param cellInfo/* ww w.j a v a2s. c o m*/ * @throws IOException */ protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath, final OutputStream intermediateCellStream, final OutputStream masterFile, final Prism cellMbr) throws IOException { Thread closingThread = new Thread() { @Override public void run() { try { Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream, finalCellPath); // Write an entry to the master file // Write a line to the master file including file name and cellInfo if (masterFile != null) { Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr); Text line = partition.toText(new Text()); masterFile.write(line.getBytes(), 0, line.getLength()); masterFile.write(NEW_LINE); } } catch (IOException e) { throw new RuntimeException("Error closing thread", e); } } }; closingThreads.add(closingThread); // Remove previously terminated threads while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) { closingThreads.remove(0); } // Start first thread (if exists) if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW) closingThreads.get(0).start(); }
From source file:com.ricemap.spateDB.core.SpatialSite.java
License:Apache License
public static GlobalIndex<Partition> getGlobalIndex(FileSystem fs, Path dir) throws IOException { // Retrieve the master file (the only file with the name _master in it) FileStatus[] masterFiles = fs.listStatus(dir, new PathFilter() { @Override/* w w w . j a v a2s .com*/ public boolean accept(Path path) { return path.getName().contains("_master"); } }); // Check if the given file is indexed if (masterFiles.length == 0) return null; if (masterFiles.length > 1) throw new RuntimeException("Found more than one master file in " + dir); Path masterFile = masterFiles[0].getPath(); ShapeRecordReader<Partition> reader = new ShapeRecordReader<Partition>(fs.open(masterFile), 0, fs.getFileStatus(masterFile).getLen()); CellInfo dummy = new CellInfo(); Partition partition = new Partition(); ArrayList<Partition> partitions = new ArrayList<Partition>(); while (reader.next(dummy, partition)) { partitions.add(partition.clone()); } GlobalIndex<Partition> globalIndex = new GlobalIndex<Partition>(); globalIndex.bulkLoad(partitions.toArray(new Partition[partitions.size()])); globalIndex.setCompact(masterFile.getName().endsWith("rtree") || masterFile.getName().endsWith("r+tree")); globalIndex.setReplicated(masterFile.getName().endsWith("r+tree") || masterFile.getName().endsWith("grid")); return globalIndex; }
From source file:com.ricemap.spateDB.core.SpatialSite.java
License:Apache License
public static void setCells(JobConf job, CellInfo[] cellsInfo) throws IOException { Path tempFile; FileSystem fs = FileSystem.get(job); do {/*from w ww .ja v a2 s . c o m*/ tempFile = new Path(job.getJobName() + "_" + (int) (Math.random() * 1000000) + ".cells"); } while (fs.exists(tempFile)); FSDataOutputStream out = fs.create(tempFile); out.writeInt(cellsInfo.length); for (CellInfo cell : cellsInfo) { cell.write(out); } out.close(); fs.deleteOnExit(tempFile); DistributedCache.addCacheFile(tempFile.toUri(), job); job.set(OUTPUT_CELLS, tempFile.getName()); LOG.info("Partitioning file into " + cellsInfo.length + " cells"); }
From source file:com.ricemap.spateDB.core.SpatialSite.java
License:Apache License
public static CellInfo[] getCells(JobConf job) throws IOException { CellInfo[] cells = null;/* w w w .j av a 2s . c om*/ String cells_file = job.get(OUTPUT_CELLS); if (cells_file != null) { Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job); for (Path cacheFile : cacheFiles) { if (cacheFile.getName().contains(cells_file)) { FSDataInputStream in = FileSystem.getLocal(job).open(cacheFile); int cellCount = in.readInt(); cells = new CellInfo[cellCount]; for (int i = 0; i < cellCount; i++) { cells[i] = new CellInfo(); cells[i].readFields(in); } in.close(); } } } return cells; }