Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java

License:Open Source License

/**
 * <code>addFileMetadata</code> stores the pathname, size, owner, etc.
 *//*from w w  w  . ja va 2s.  c  o  m*/
void addFileMetadata(final FileStatus fstatus, final long crawlId) {
    // Compute strings to represent file metadata
    Path insertFile = fstatus.getPath();
    final boolean isDir = fstatus.isDir();
    FsPermission fsp = fstatus.getPermission();
    final String permissions = (isDir ? "d" : "-") + fsp.getUserAction().SYMBOL + fsp.getGroupAction().SYMBOL
            + fsp.getOtherAction().SYMBOL;

    // Compute formal pathname representation
    String fnameString = null;
    String parentPathString = null;
    if (isDir && insertFile.getParent() == null) {
        parentPathString = "";
        fnameString = insertFile.toString();
    } else {
        fnameString = insertFile.getName();
        parentPathString = insertFile.getParent().toString();

        // REMIND --- mjc --- If we want to modify the Files table s.t. it does
        // not contain the filesystem prefix, then this would be the place to do it.

        if (!parentPathString.endsWith("/")) {
            parentPathString = parentPathString + "/";
        }
    }
    final String parentPath = parentPathString;
    final String fName = fnameString;
    final long fileId = dbQueue.execute(new SQLiteJob<Long>() {
        protected Long job(SQLiteConnection db) throws SQLiteException {
            SQLiteStatement stmt = db.prepare("INSERT into Files VALUES(null, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
            try {
                stmt.bind(1, isDir ? "True" : "False").bind(2, crawlId).bind(3, fName)
                        .bind(4, fstatus.getOwner()).bind(5, fstatus.getGroup()).bind(6, permissions)
                        .bind(7, fstatus.getLen())
                        .bind(8, fileDateFormat.format(new Date(fstatus.getModificationTime())))
                        .bind(9, parentPath);
                stmt.step();
                return db.getLastInsertId();
            } finally {
                stmt.dispose();
            }
        }
    }).complete();
}

From source file:com.cloudera.recordbreaker.analyzer.FSAnalyzer.java

License:Open Source License

/**
 * Get the parents for the given directory from a given crawl
 *///  w  w  w .java  2s  .  co  m
public List<FileSummary> getDirParents(final long crawlid, final String targetDirStr) {
    return dbQueue.execute(new SQLiteJob<List<FileSummary>>() {
        protected List<FileSummary> job(SQLiteConnection db) throws SQLiteException {
            List<FileSummary> output = new ArrayList<FileSummary>();
            SQLiteStatement stmt = db.prepare(
                    "select fid, path, fname from Files WHERE crawlid = ? AND length(?) > length(path||fname) AND isDir = 'True' AND replace(?, path||fname, '') LIKE '/%'");
            try {
                Path targetDir = new Path(targetDirStr);
                if (targetDir.getParent() != null) {
                    stmt.bind(1, crawlid).bind(2, targetDir.toString()).bind(3, targetDir.toString());
                    while (stmt.step()) {
                        //Path p = new Path(stmt.columnString(0) + stmt.columnString(1));
                        output.add(new FileSummary(FSAnalyzer.this, stmt.columnLong(0)));
                    }
                }
            } finally {
                stmt.dispose();
            }
            return output;
        }
    }).complete();
}

From source file:com.cloudera.sqoop.lib.TestBlobRef.java

License:Apache License

private void doExternalTest(final byte[] data, final String filename) throws IOException {

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }// ww  w.j ava2 s  . c o  m
    FileSystem fs = FileSystem.get(conf);
    String tmpDir = System.getProperty("test.build.data", "/tmp/");

    Path tmpPath = new Path(tmpDir);
    Path blobFile = new Path(tmpPath, filename);

    // make any necessary parent dirs.
    Path blobParent = blobFile.getParent();
    if (!fs.exists(blobParent)) {
        fs.mkdirs(blobParent);
    }

    LobFile.Writer lw = LobFile.create(blobFile, conf, false);
    try {
        long off = lw.tell();
        long len = data.length;
        OutputStream os = lw.writeBlobRecord(len);
        os.write(data, 0, data.length);
        os.close();
        lw.close();

        String refString = "externalLob(lf," + filename + "," + off + "," + len + ")";
        BlobRef blob = BlobRef.parse(refString);
        assertTrue(blob.isExternal());
        assertEquals(refString, blob.toString());
        InputStream is = blob.getDataStream(conf, tmpPath);
        assertNotNull(is);

        byte[] buf = new byte[4096];
        int bytes = is.read(buf, 0, 4096);
        is.close();

        assertEquals(data.length, bytes);
        for (int i = 0; i < bytes; i++) {
            assertEquals(data[i], buf[i]);
        }
    } finally {
        fs.delete(blobFile, false);
    }
}

From source file:com.cloudera.sqoop.lib.TestClobRef.java

License:Apache License

private void doExternalTest(final String data, final String filename) throws IOException {

    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
        conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }// w ww  .jav a  2s  .c o m
    FileSystem fs = FileSystem.get(conf);
    String tmpDir = System.getProperty("test.build.data", "/tmp/");

    Path tmpPath = new Path(tmpDir);
    Path clobFile = new Path(tmpPath, filename);

    // make any necessary parent dirs.
    Path clobParent = clobFile.getParent();
    if (!fs.exists(clobParent)) {
        fs.mkdirs(clobParent);
    }

    LobFile.Writer lw = LobFile.create(clobFile, conf, true);
    try {
        long off = lw.tell();
        long len = data.length();
        Writer w = lw.writeClobRecord(len);
        w.append(data);
        w.close();
        lw.close();

        String refString = "externalLob(lf," + filename + "," + off + "," + len + ")";
        ClobRef clob = ClobRef.parse(refString);
        assertTrue(clob.isExternal());
        assertEquals(refString, clob.toString());
        Reader r = clob.getDataStream(conf, tmpPath);
        assertNotNull(r);

        char[] buf = new char[4096];
        int chars = r.read(buf, 0, 4096);
        r.close();

        String str = new String(buf, 0, chars);
        assertEquals(data, str);
    } finally {
        fs.delete(clobFile, false);
    }
}

From source file:com.collective.celos.ci.testing.fixtures.deploy.HdfsInputDeployer.java

License:Apache License

@Override
public void deploy(TestRun testRun) throws Exception {
    FileSystem fileSystem = testRun.getCiContext().getFileSystem();

    CollectFilesAndPathsProcessor pathToFile = new CollectFilesAndPathsProcessor();
    TreeObjectProcessor.process(fixObjectCreator.create(testRun), pathToFile);

    Path pathPrefixed = new Path(Util.augmentHdfsPath(testRun.getHdfsPrefix(), path.toString()));
    for (java.nio.file.Path childPath : pathToFile.pathToFiles.keySet()) {
        Path pathTo = new Path(pathPrefixed, childPath.toString());
        fileSystem.mkdirs(pathTo.getParent());

        FSDataOutputStream outputStream = fileSystem.create(pathTo);
        try {/* w  w w  . ja va 2 s . c  om*/
            IOUtils.copy(pathToFile.pathToFiles.get(childPath).getContent(), outputStream);
        } finally {
            outputStream.flush();
            outputStream.close();
        }

    }
}

From source file:com.collective.celos.ci.testing.fixtures.deploy.hive.HiveTableDeployer.java

License:Apache License

private Path createTempHdfsFileForInsertion(FixTable fixTable, TestRun testRun) throws Exception {

    Path pathToParent = new Path(testRun.getHdfsPrefix(), ".hive");
    Path pathTo = new Path(pathToParent, UUID.randomUUID().toString());
    FileSystem fileSystem = testRun.getCiContext().getFileSystem();
    fileSystem.mkdirs(pathTo.getParent());
    FSDataOutputStream outputStream = fileSystem.create(pathTo);

    CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream), '\t', CSVWriter.NO_QUOTE_CHARACTER);

    for (FixTable.FixRow fixRow : fixTable.getRows()) {
        List<String> rowData = Lists.newArrayList();
        for (String colName : fixTable.getColumnNames()) {
            rowData.add(fixRow.getCells().get(colName));
        }/*from  w ww. j av a 2s . c om*/
        String[] dataArray = rowData.toArray(new String[rowData.size()]);
        writer.writeNext(dataArray);
    }

    writer.close();

    fileSystem.setPermission(pathToParent, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    fileSystem.setPermission(pathTo, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    return pathTo;
}

From source file:com.datasalt.pangool.utils.test.AbstractHadoopTestLibrary.java

License:Apache License

public void cleanUp() throws IOException {
    for (Map.Entry<String, Object> entry : inputs.entrySet()) {
        trash(entry.getKey());/*from  www .  j a v a2 s .co m*/
    }
    for (Map.Entry<String, List<Pair<Object, Object>>> entry : outputs.entrySet()) {
        Path p = new Path(entry.getKey());
        if (p.toString().contains("-0000")) {
            p = p.getParent();
        }
        trash(p.toString());
    }
}

From source file:com.datatorrent.lib.io.fs.FileStitcher.java

License:Apache License

protected void mergeBlocks(T stitchedFileMetaData) throws IOException {
    //when writing to tmp files there can be vagrant tmp files which we have to clean
    final Path dst = new Path(filePath, stitchedFileMetaData.getStitchedFileRelativePath());
    PathFilter tempFileFilter = new PathFilter() {
        @Override/* w  ww .j  ava 2s.  c  om*/
        public boolean accept(Path path) {
            return path.getName().startsWith(dst.getName()) && path.getName().endsWith(PART_FILE_EXTENTION);
        }
    };
    if (outputFS.exists(dst.getParent())) {
        FileStatus[] statuses = outputFS.listStatus(dst.getParent(), tempFileFilter);
        for (FileStatus status : statuses) {
            String statusName = status.getPath().getName();
            LOG.debug("deleting vagrant file {}", statusName);
            outputFS.delete(status.getPath(), true);
        }
    }
    tempOutFilePath = new Path(filePath, stitchedFileMetaData.getStitchedFileRelativePath() + '.'
            + System.currentTimeMillis() + PART_FILE_EXTENTION);
    try {
        writeTempOutputFile(stitchedFileMetaData);
        moveToFinalFile(stitchedFileMetaData);
    } catch (BlockNotFoundException e) {
        LOG.warn("Block file {} not found. Assuming recovery mode for file {}. ", e.getBlockPath(),
                stitchedFileMetaData.getStitchedFileRelativePath());
        //Remove temp output file
        outputFS.delete(tempOutFilePath, false);
    }
}

From source file:com.datatorrent.lib.io.fs.FileStitcher.java

License:Apache License

/**
 * Moving temp output file to final file
 * //from w  w w .  j a v  a2  s .c om
 * @param tempOutFilePath
 *          Temporary output file
 * @param destination
 *          Destination directory path
 * @throws IOException
 */
protected void moveToFinalFile(Path tempOutFilePath, Path destination) throws IOException {
    Path src = Path.getPathWithoutSchemeAndAuthority(tempOutFilePath);
    Path dst = Path.getPathWithoutSchemeAndAuthority(destination);

    boolean moveSuccessful = false;
    if (!outputFS.exists(dst.getParent())) {
        outputFS.mkdirs(dst.getParent());
    }
    if (outputFS.exists(dst)) {
        outputFS.delete(dst, false);
    }
    moveSuccessful = outputFS.rename(src, dst);

    if (moveSuccessful) {
        LOG.debug("File {} moved successfully to destination folder.", dst);
    } else {
        throw new RuntimeException("Unable to move file from " + src + " to " + dst);
    }
}

From source file:com.digitalpebble.behemoth.mahout.util.Mahout2LibSVM.java

License:Apache License

public int run(String[] args) throws Exception {

    Options options = new Options();
    // automatically generate the help statement
    HelpFormatter formatter = new HelpFormatter();
    // create the parser
    CommandLineParser parser = new GnuParser();

    options.addOption("h", "help", false, "print this message");
    options.addOption("v", "vector", true, "input vector sequencefile");
    options.addOption("l", "label", true, "input vector sequencefile");
    options.addOption("o", "output", true, "output Behemoth corpus");

    // parse the command line arguments
    CommandLine line = null;//from w w w  .j a  va  2  s.  c o m
    try {
        line = parser.parse(options, args);
        if (line.hasOption("help")) {
            formatter.printHelp("CorpusGenerator", options);
            return 0;
        }
        if (!line.hasOption("v") | !line.hasOption("o") | !line.hasOption("l")) {
            formatter.printHelp("CorpusGenerator", options);
            return -1;
        }
    } catch (ParseException e) {
        formatter.printHelp("CorpusGenerator", options);
    }

    Path vectorPath = new Path(line.getOptionValue("v"));
    Path labelPath = new Path(line.getOptionValue("l"));
    String output = line.getOptionValue("o");

    Path tempOutput = new Path(vectorPath.getParent(), "temp-" + System.currentTimeMillis());

    // extracts the string representations from the vectors
    int retVal = vectorToString(vectorPath, tempOutput);
    if (retVal != 0) {
        HadoopUtil.delete(getConf(), tempOutput);
        return retVal;
    }

    Path tempOutput2 = new Path(vectorPath.getParent(), "temp-" + System.currentTimeMillis());

    retVal = convert(tempOutput, labelPath, tempOutput2);

    // delete the temp output
    HadoopUtil.delete(getConf(), tempOutput);

    if (retVal != 0) {
        HadoopUtil.delete(getConf(), tempOutput2);
        return retVal;
    }

    // convert tempOutput to standard file
    BufferedWriter bow = new BufferedWriter(new FileWriter(new File(output)));

    // the label dictionary is not dumped to text
    int labelMaxIndex = 0;
    Map<String, Integer> labelIndex = new HashMap<String, Integer>();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] fss = fs.listStatus(tempOutput2);
    try {
        for (FileStatus status : fss) {
            Path path = status.getPath();
            // skips the _log or _SUCCESS files
            if (!path.getName().startsWith("part-") && !path.getName().equals(tempOutput2.getName()))
                continue;
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
            // read the key + values in that file
            Text key = new Text();
            Text value = new Text();
            while (reader.next(key, value)) {
                String label = key.toString();
                // replace the label by its index
                Integer indexLabel = labelIndex.get(label);
                if (indexLabel == null) {
                    indexLabel = new Integer(labelMaxIndex);
                    labelIndex.put(label, indexLabel);
                    labelMaxIndex++;
                }
                String val = value.toString();
                bow.append(indexLabel.toString()).append(val).append("\n");
            }
            reader.close();
        }
        bow.flush();
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        bow.close();
        fs.delete(tempOutput2, true);
    }
    return 0;
}