List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java
License:Open Source License
/** * <code>addFileMetadata</code> stores the pathname, size, owner, etc. *//*from w w w . ja va 2s. c o m*/ void addFileMetadata(final FileStatus fstatus, final long crawlId) { // Compute strings to represent file metadata Path insertFile = fstatus.getPath(); final boolean isDir = fstatus.isDir(); FsPermission fsp = fstatus.getPermission(); final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL + fsp.getOtherAction().SYMBOL; // Compute formal pathname representation String fnameString = null; String parentPathString = null; if (isDir && insertFile.getParent() == null) { parentPathString = ""; fnameString = insertFile.toString(); } else { fnameString = insertFile.getName(); parentPathString = insertFile.getParent().toString(); // REMIND --- mjc --- If we want to modify the Files table s.t. it does // not contain the filesystem prefix, then this would be the place to do it. if (!parentPathString.endsWith("/")) { parentPathString = parentPathString + "/"; } } final String parentPath = parentPathString; final String fName = fnameString; final long fileId = dbQueue.execute(new SQLiteJob<Long>() { protected Long job(SQLiteConnection db) throws SQLiteException { SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); try { stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName) .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions) .bind(7, fstatus.getLen()) .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime()))) .bind(9, parentPath); stmt.step(); return db.getLastInsertId(); } finally { stmt.dispose(); } } }).complete(); }
From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java
License:Open Source License
/** * Get the parents for the given directory from a given crawl */// w w w .java 2s . co m public List<FileSummary> getDirParents(final long crawlid, final String targetDirStr) { return dbQueue.execute(new SQLiteJob<List<FileSummary>>() { protected List<FileSummary> job(SQLiteConnection db) throws SQLiteException { List<FileSummary> output = new ArrayList<FileSummary>(); SQLiteStatement stmt = db.prepare( "select fid, path, fname from Files WHERE crawlid = ? AND length(?) > length(path||fname) AND isDir = 'True' AND replace(?, path||fname, '') LIKE '/%'"); try { Path targetDir = new Path(targetDirStr); if (targetDir.getParent() != null) { stmt.bind(1, crawlid).bind(2, targetDir.toString()).bind(3, targetDir.toString()); while (stmt.step()) { //Path p = new Path(stmt.columnString(0) + stmt.columnString(1)); output.add(new FileSummary(FSAnalyzer.this, stmt.columnLong(0))); } } } finally { stmt.dispose(); } return output; } }).complete(); }
From source file:com.cloudera.sqoop.lib.TestBlobRef.java
License:Apache License
private void doExternalTest(final byte[] data, final String filename) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }// ww w.j ava2 s . c o m FileSystem fs = FileSystem.get(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path tmpPath = new Path(tmpDir); Path blobFile = new Path(tmpPath, filename); // make any necessary parent dirs. Path blobParent = blobFile.getParent(); if (!fs.exists(blobParent)) { fs.mkdirs(blobParent); } LobFile.Writer lw = LobFile.create(blobFile, conf, false); try { long off = lw.tell(); long len = data.length; OutputStream os = lw.writeBlobRecord(len); os.write(data, 0, data.length); os.close(); lw.close(); String refString = "externalLob(lf," + filename + "," + off + "," + len + ")"; BlobRef blob = BlobRef.parse(refString); assertTrue(blob.isExternal()); assertEquals(refString, blob.toString()); InputStream is = blob.getDataStream(conf, tmpPath); assertNotNull(is); byte[] buf = new byte[4096]; int bytes = is.read(buf, 0, 4096); is.close(); assertEquals(data.length, bytes); for (int i = 0; i < bytes; i++) { assertEquals(data[i], buf[i]); } } finally { fs.delete(blobFile, false); } }
From source file:com.cloudera.sqoop.lib.TestClobRef.java
License:Apache License
private void doExternalTest(final String data, final String filename) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }// w ww .jav a 2s .c o m FileSystem fs = FileSystem.get(conf); String tmpDir = System.getProperty("test.build.data", "/tmp/"); Path tmpPath = new Path(tmpDir); Path clobFile = new Path(tmpPath, filename); // make any necessary parent dirs. Path clobParent = clobFile.getParent(); if (!fs.exists(clobParent)) { fs.mkdirs(clobParent); } LobFile.Writer lw = LobFile.create(clobFile, conf, true); try { long off = lw.tell(); long len = data.length(); Writer w = lw.writeClobRecord(len); w.append(data); w.close(); lw.close(); String refString = "externalLob(lf," + filename + "," + off + "," + len + ")"; ClobRef clob = ClobRef.parse(refString); assertTrue(clob.isExternal()); assertEquals(refString, clob.toString()); Reader r = clob.getDataStream(conf, tmpPath); assertNotNull(r); char[] buf = new char[4096]; int chars = r.read(buf, 0, 4096); r.close(); String str = new String(buf, 0, chars); assertEquals(data, str); } finally { fs.delete(clobFile, false); } }
From source file:com.collective.celos.ci.testing.fixtures.deploy.HdfsInputDeployer.java
License:Apache License
@Override public void deploy(TestRun testRun) throws Exception { FileSystem fileSystem = testRun.getCiContext().getFileSystem(); CollectFilesAndPathsProcessor pathToFile = new CollectFilesAndPathsProcessor(); TreeObjectProcessor.process(fixObjectCreator.create(testRun), pathToFile); Path pathPrefixed = new Path(Util.augmentHdfsPath(testRun.getHdfsPrefix(), path.toString())); for (java.nio.file.Path childPath : pathToFile.pathToFiles.keySet()) { Path pathTo = new Path(pathPrefixed, childPath.toString()); fileSystem.mkdirs(pathTo.getParent()); FSDataOutputStream outputStream = fileSystem.create(pathTo); try {/* w w w . ja va 2 s . c om*/ IOUtils.copy(pathToFile.pathToFiles.get(childPath).getContent(), outputStream); } finally { outputStream.flush(); outputStream.close(); } } }
From source file:com.collective.celos.ci.testing.fixtures.deploy.hive.HiveTableDeployer.java
License:Apache License
private Path createTempHdfsFileForInsertion(FixTable fixTable, TestRun testRun) throws Exception { Path pathToParent = new Path(testRun.getHdfsPrefix(), ".hive"); Path pathTo = new Path(pathToParent, UUID.randomUUID().toString()); FileSystem fileSystem = testRun.getCiContext().getFileSystem(); fileSystem.mkdirs(pathTo.getParent()); FSDataOutputStream outputStream = fileSystem.create(pathTo); CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream), '\t', CSVWriter.NO_QUOTE_CHARACTER); for (FixTable.FixRow fixRow : fixTable.getRows()) { List<String> rowData = Lists.newArrayList(); for (String colName : fixTable.getColumnNames()) { rowData.add(fixRow.getCells().get(colName)); }/*from w ww. j av a 2s . c om*/ String[] dataArray = rowData.toArray(new String[rowData.size()]); writer.writeNext(dataArray); } writer.close(); fileSystem.setPermission(pathToParent, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); fileSystem.setPermission(pathTo, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); return pathTo; }
From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java
License:Apache License
public void cleanUp() throws IOException { for (Map.Entry<String, Object> entry : inputs.entrySet()) { trash(entry.getKey());/*from www . j a v a2 s .co m*/ } for (Map.Entry<String, List<Pair<Object, Object>>> entry : outputs.entrySet()) { Path p = new Path(entry.getKey()); if (p.toString().contains("-0000")) { p = p.getParent(); } trash(p.toString()); } }
From source file:com.datatorrent.lib.io.fs.FileStitcher.java
License:Apache License
protected void mergeBlocks(T stitchedFileMetaData) throws IOException { //when writing to tmp files there can be vagrant tmp files which we have to clean final Path dst = new Path(filePath, stitchedFileMetaData.getStitchedFileRelativePath()); PathFilter tempFileFilter = new PathFilter() { @Override/* w ww .j ava 2s. c om*/ public boolean accept(Path path) { return path.getName().startsWith(dst.getName()) && path.getName().endsWith(PART_FILE_EXTENTION); } }; if (outputFS.exists(dst.getParent())) { FileStatus[] statuses = outputFS.listStatus(dst.getParent(), tempFileFilter); for (FileStatus status : statuses) { String statusName = status.getPath().getName(); LOG.debug("deleting vagrant file {}", statusName); outputFS.delete(status.getPath(), true); } } tempOutFilePath = new Path(filePath, stitchedFileMetaData.getStitchedFileRelativePath() + '.' + System.currentTimeMillis() + PART_FILE_EXTENTION); try { writeTempOutputFile(stitchedFileMetaData); moveToFinalFile(stitchedFileMetaData); } catch (BlockNotFoundException e) { LOG.warn("Block file {} not found. Assuming recovery mode for file {}. ", e.getBlockPath(), stitchedFileMetaData.getStitchedFileRelativePath()); //Remove temp output file outputFS.delete(tempOutFilePath, false); } }
From source file:com.datatorrent.lib.io.fs.FileStitcher.java
License:Apache License
/** * Moving temp output file to final file * //from w w w . j a v a2 s .c om * @param tempOutFilePath * Temporary output file * @param destination * Destination directory path * @throws IOException */ protected void moveToFinalFile(Path tempOutFilePath, Path destination) throws IOException { Path src = Path.getPathWithoutSchemeAndAuthority(tempOutFilePath); Path dst = Path.getPathWithoutSchemeAndAuthority(destination); boolean moveSuccessful = false; if (!outputFS.exists(dst.getParent())) { outputFS.mkdirs(dst.getParent()); } if (outputFS.exists(dst)) { outputFS.delete(dst, false); } moveSuccessful = outputFS.rename(src, dst); if (moveSuccessful) { LOG.debug("File {} moved successfully to destination folder.", dst); } else { throw new RuntimeException("Unable to move file from " + src + " to " + dst); } }
From source file:com.digitalpebble.behemoth.mahout.util.Mahout2LibSVM.java
License:Apache License
public int run(String[] args) throws Exception { Options options = new Options(); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); // create the parser CommandLineParser parser = new GnuParser(); options.addOption("h", "help", false, "print this message"); options.addOption("v", "vector", true, "input vector sequencefile"); options.addOption("l", "label", true, "input vector sequencefile"); options.addOption("o", "output", true, "output Behemoth corpus"); // parse the command line arguments CommandLine line = null;//from w w w .j a va 2 s. c o m try { line = parser.parse(options, args); if (line.hasOption("help")) { formatter.printHelp("CorpusGenerator", options); return 0; } if (!line.hasOption("v") | !line.hasOption("o") | !line.hasOption("l")) { formatter.printHelp("CorpusGenerator", options); return -1; } } catch (ParseException e) { formatter.printHelp("CorpusGenerator", options); } Path vectorPath = new Path(line.getOptionValue("v")); Path labelPath = new Path(line.getOptionValue("l")); String output = line.getOptionValue("o"); Path tempOutput = new Path(vectorPath.getParent(), "temp-" + System.currentTimeMillis()); // extracts the string representations from the vectors int retVal = vectorToString(vectorPath, tempOutput); if (retVal != 0) { HadoopUtil.delete(getConf(), tempOutput); return retVal; } Path tempOutput2 = new Path(vectorPath.getParent(), "temp-" + System.currentTimeMillis()); retVal = convert(tempOutput, labelPath, tempOutput2); // delete the temp output HadoopUtil.delete(getConf(), tempOutput); if (retVal != 0) { HadoopUtil.delete(getConf(), tempOutput2); return retVal; } // convert tempOutput to standard file BufferedWriter bow = new BufferedWriter(new FileWriter(new File(output))); // the label dictionary is not dumped to text int labelMaxIndex = 0; Map<String, Integer> labelIndex = new HashMap<String, Integer>(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); FileStatus[] fss = fs.listStatus(tempOutput2); try { for (FileStatus status : fss) { Path path = status.getPath(); // skips the _log or _SUCCESS files if (!path.getName().startsWith("part-") && !path.getName().equals(tempOutput2.getName())) continue; SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); // read the key + values in that file Text key = new Text(); Text value = new Text(); while (reader.next(key, value)) { String label = key.toString(); // replace the label by its index Integer indexLabel = labelIndex.get(label); if (indexLabel == null) { indexLabel = new Integer(labelMaxIndex); labelIndex.put(label, indexLabel); labelMaxIndex++; } String val = value.toString(); bow.append(indexLabel.toString()).append(val).append("\n"); } reader.close(); } bow.flush(); } catch (Exception e) { e.printStackTrace(); return -1; } finally { bow.close(); fs.delete(tempOutput2, true); } return 0; }