Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.ikanow.aleph2.data_import_manager.batch_enrichment.actors.FolderWatcherActor.java

License:Apache License

protected void traverseFolders() {
    logger.debug("traverseFolders for path: " + _bucket_paths);
    try {/*from  w w  w  .j a v  a 2s . c  om*/
        detectBucketPaths();
        if (_bucket_paths != null) {
            logger.debug("traverseFolders for path: " + _bucket_paths);
            for (Path path : _bucket_paths) {
                String bucketPathStr = path.toString();
                String dataPathStr = dataPath.toString();
                String bucketFullName = createFullName(bucketPathStr, dataPathStr);
                // create or send message to BatchBucketActors
                checkAndScheduleBucketAgent(bucketPathStr, bucketFullName);
            }
        }
    } catch (Exception e) {
        logger.error("traverseFolders Caught Exception:", e);
    }

}

From source file:com.ikanow.aleph2.data_import_manager.harvest.actors.TestDataBucketChangeActor.java

License:Apache License

protected List<SharedLibraryBean> createSharedLibraryBeans(Path path1, Path path2) {
    final SharedLibraryBean lib_element = BeanTemplateUtils.build(SharedLibraryBean.class)
            .with(SharedLibraryBean::_id, "test_tech_id_harvest")
            .with(SharedLibraryBean::path_name, path1.toString()).with(SharedLibraryBean::misc_entry_point,
                    "com.ikanow.aleph2.test.example.ExampleHarvestTechnology")
            .done().get();// w ww.  j  av a 2 s . c o m

    final SharedLibraryBean lib_element2 = BeanTemplateUtils.build(SharedLibraryBean.class)
            .with(SharedLibraryBean::_id, "test_module_id").with(SharedLibraryBean::path_name, path2.toString())
            .done().get();

    final SharedLibraryBean lib_element3 = BeanTemplateUtils.build(SharedLibraryBean.class)
            .with(SharedLibraryBean::_id, "failtest")
            .with(SharedLibraryBean::path_name, "/not_exist/here.fghgjhgjhg").done().get();

    return Arrays.asList(lib_element, lib_element2, lib_element3);
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields)
        throws IOException, SAXException, ParserConfigurationException {

    BasicDBList dbl = new BasicDBList();

    PropertiesManager props = new PropertiesManager();
    Configuration conf = getConfiguration(props);

    Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false);
    FileSystem fs = FileSystem.get(conf);

    FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*"));
    for (FileStatus file : files) {
        if (file.getLen() > 0) {
            FSDataInputStream in = fs.open(file.getPath());
            BufferedReader bin = new BufferedReader(new InputStreamReader(in));
            for (;;) {
                String s = bin.readLine();
                if (null == s)
                    break;

                String[] keyValue = s.split("\t", 2);
                BasicDBObject dbo = new BasicDBObject();
                if (keyValue.length > 1) {
                    dbo.put("key", keyValue[0]);
                    dbo.put("value", keyValue[1]);
                } else {
                    dbo.put("value", keyValue[0]);
                }/*from   w w  w. j  av  a  2 s.co  m*/
                dbl.add(dbo);
            }
            in.close();
        }
    }
    return dbl;
}

From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java

License:Apache License

private void checkPathExists(Path path) {
    boolean exists;
    FileSystem fs;//from  w w  w .ja va 2 s. c o m
    Path qualifiedPath = path;
    try {
        fs = getFS(path);
        qualifiedPath = path.makeQualified(fs);
        exists = fs.exists(path);
    } catch (Exception e) {
        exists = false;
    }
    if (!exists) {
        throw new RuntimeException("The provided path doesn't exist " + qualifiedPath.toString()
                + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv"
                + "\nFor local files use 'file://' prefix like file:/tmp/file.tsv");
    }
}

From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java

License:Apache License

private void moveToProcessed(FileToIndex fileToIndex, Path processedParentDirPath) {
    // move the input file to the 'processed' or 'failed' dir
    Path processedIndexDirPath = null;
    try {//from w ww  . java2s .c  o m
        processedIndexDirPath = getIndexDirUnderPath(fileToIndex, processedParentDirPath);
        final Path processedFilePath = new Path(processedIndexDirPath, fileToIndex.fsPath.getName());
        if (inputFS.exists(processedFilePath)) {
            log.info("Deleting older processed TSV: " + processedFilePath);
            inputFS.delete(processedFilePath, false);
        }
        if (!toIndexPath.toString().equals(processedParentDirPath.toString())) {
            inputFS.rename(fileToIndex.fsPath, processedFilePath);
        }
    } catch (IOException e) {
        log.warn("Failed to move processed file " + fileToIndex.fsPath + " to " + processedIndexDirPath, e);
    }
}

From source file:com.indeed.imhotep.iql.cache.HDFSQueryCache.java

License:Apache License

@Override
public OutputStream getOutputStream(String cachedFileName) throws IOException {
    if (!enabled) {
        throw new IllegalStateException("Can't send data to HDFS cache as it is disabled");
    }/*from ww w. j ava  2  s.com*/
    makeSurePathExists(cachePath);
    final Path filePath = new Path(cachePath, cachedFileName);
    final Path tempPath = new Path(filePath.toString() + "." + (System.currentTimeMillis() % 100000) + ".tmp");

    final FSDataOutputStream fileOut = hdfs.create(tempPath);
    // Wrap the returned OutputStream so that we can finish when it is closed
    return new OutputStream() {
        private boolean closed = false;

        @Override
        public void write(byte[] b) throws IOException {
            fileOut.write(b);
        }

        @Override
        public void write(byte[] b, int off, int len) throws IOException {
            fileOut.write(b, off, len);
        }

        @Override
        public void flush() throws IOException {
            fileOut.flush();
        }

        @Override
        public void write(int b) throws IOException {
            fileOut.write(b);
        }

        @Override
        public void close() throws IOException {
            if (closed) {
                return;
            }
            closed = true;
            fileOut.close();

            // Move to the final file location
            hdfs.rename(tempPath, filePath);
        }
    };
}

From source file:com.inmobi.conduit.AbstractService.java

License:Apache License

protected void findDiffBetweenLastAddedAndFirstPath(String stream, String table) {
    Set<Path> listOfPathsTobeRegistered = pathsToBeregisteredPerTable.get(table);
    synchronized (listOfPathsTobeRegistered) {
        if (listOfPathsTobeRegistered.isEmpty()) {
            return;
        } else {/*from   ww  w  .ja va2  s . com*/
            long lastAddedTime = lastAddedPartitionMap.get(table);
            Iterator<Path> it = listOfPathsTobeRegistered.iterator();
            // get the first path
            Path firstPathInList = it.next();
            Date timeFromPath = getTimeStampFromHCatPartition(firstPathInList.toString(), stream);
            LOG.info("Find the missing partitions between " + getLogDateString(lastAddedTime) + " and "
                    + timeFromPath + " for table " + table);
            while (isMissingPartitions(timeFromPath.getTime(), lastAddedTime)) {
                long nextPathPartTime = lastAddedTime + MILLISECONDS_IN_MINUTE;
                Path nextPathTobeAdded = getFinalPath(nextPathPartTime, stream);
                LOG.info("Add the missing partition location " + nextPathTobeAdded
                        + " to the list for registering");
                if (nextPathTobeAdded != null) {
                    listOfPathsTobeRegistered.add(nextPathTobeAdded);
                    lastAddedTime = nextPathPartTime;
                }
            }
            pathsToBeregisteredPerTable.put(table, listOfPathsTobeRegistered);
        }
    }
}

From source file:com.inmobi.conduit.AbstractService.java

License:Apache License

protected void registerPartitions() throws InterruptedException, ParseException, HiveException {
    // return immediately if hcat is not enabled or if user issues a stop command
    if (!Conduit.isHCatEnabled() || stopped) {
        LOG.info("Hcat is not enabled or stop is issued. Hence not registering any partitions");
        return;//from  ww  w .j a va  2s.c  om
    }
    for (String stream : streamsToProcess) {
        if (!isStreamHCatEnabled(stream)) {
            LOG.info("Hcat is not enabled for " + stream + " stream." + " Hence not registering partitions");
            continue;
        }
        String tableName = getTableName(stream);
        /*
         * If it is not able to find the diff between the last added partition
         * and first path in the partition list then it should not register
         * partitions until it finds the diff
         */
        if (!preparePartitionsTobeRegistered(stream)) {
            LOG.info("Not registering the partitions as part of this run as"
                    + " it was not able to find the last added partition"
                    + " or diff betweeen last added and first path in the list");
            continue;
        }
        if (lastAddedPartitionMap.get(tableName) == FAILED_GET_PARTITIONS) {
            LOG.warn("Failed to get partitions for stream from server hence"
                    + " not registering new partiotions");
            continue;
        }
        Set<Path> partitionsTobeRegistered = pathsToBeregisteredPerTable.get(tableName);
        LOG.info("partitions to be registered : " + partitionsTobeRegistered + " for " + tableName + " table");
        synchronized (partitionsTobeRegistered) {
            if (partitionsTobeRegistered.isEmpty() || partitionsTobeRegistered.size() == 1) {
                LOG.info("No partitions to be registered for table " + tableName);
                return;
            }
            AddPartitionDesc addPd = new AddPartitionDesc(Conduit.getHcatDBName(), tableName, true);
            int numOfPartitionsTobeRegistered = partitionsTobeRegistered.size() - 1;
            Date updateWithLastAddedTime = null;
            int count = 0;
            Iterator<Path> pathIt = partitionsTobeRegistered.iterator();
            // Register all the partitions in the list except the last one
            while (count++ < numOfPartitionsTobeRegistered) {
                /* always retrieve first element from the list as we remove the
                 * element once it is added to partition. then second element will
                 *  be the first one
                 */
                Path pathToBeregistered = pathIt.next();

                Date partitionDate = getTimeStampFromHCatPartition(pathToBeregistered.toString(), stream);

                addPd.addPartition(getPartSpecFromPartTime(partitionDate.getTime()),
                        pathToBeregistered.toString());
                updateWithLastAddedTime = partitionDate;
            }
            /* Add all partitions to the table and remove registered paths from
             * the in memory set if all partitions were added successfully
             */
            if (addPartitions(stream, tableName, addPd, updateWithLastAddedTime)) {
                Iterator<Path> it = partitionsTobeRegistered.iterator();
                while (numOfPartitionsTobeRegistered-- > 0) {
                    LOG.debug("Remove partition path " + it.next() + "from the partitionMap");
                    it.remove();
                }
            }
        }
    }
}

From source file:com.inmobi.conduit.AbstractService.java

License:Apache License

private List<Path> listPartFiles(Path path, FileSystem fs) {
    List<Path> matches = new LinkedList<Path>();
    try {//from   w w w  . j a  v  a2s  .  c  om
        FileStatus[] statuses = fs.listStatus(path, new PathFilter() {
            public boolean accept(Path path) {
                return path.toString().contains("part");
            }
        });
        for (FileStatus status : statuses) {
            matches.add(status.getPath());
        }
    } catch (IOException e) {
        LOG.error(e.getMessage(), e);
    }
    return matches;
}