List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.ikanow.aleph2.data_import_manager.batch_enrichment.actors.FolderWatcherActor.java
License:Apache License
protected void traverseFolders() { logger.debug("traverseFolders for path: " + _bucket_paths); try {/*from w w w .j a v a 2s . c om*/ detectBucketPaths(); if (_bucket_paths != null) { logger.debug("traverseFolders for path: " + _bucket_paths); for (Path path : _bucket_paths) { String bucketPathStr = path.toString(); String dataPathStr = dataPath.toString(); String bucketFullName = createFullName(bucketPathStr, dataPathStr); // create or send message to BatchBucketActors checkAndScheduleBucketAgent(bucketPathStr, bucketFullName); } } } catch (Exception e) { logger.error("traverseFolders Caught Exception:", e); } }
From source file:com.ikanow.aleph2.data_import_manager.harvest.actors.TestDataBucketChangeActor.java
License:Apache License
protected List<SharedLibraryBean> createSharedLibraryBeans(Path path1, Path path2) { final SharedLibraryBean lib_element = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "test_tech_id_harvest") .with(SharedLibraryBean::path_name, path1.toString()).with(SharedLibraryBean::misc_entry_point, "com.ikanow.aleph2.test.example.ExampleHarvestTechnology") .done().get();// w ww. j av a 2 s . c o m final SharedLibraryBean lib_element2 = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "test_module_id").with(SharedLibraryBean::path_name, path2.toString()) .done().get(); final SharedLibraryBean lib_element3 = BeanTemplateUtils.build(SharedLibraryBean.class) .with(SharedLibraryBean::_id, "failtest") .with(SharedLibraryBean::path_name, "/not_exist/here.fghgjhgjhg").done().get(); return Arrays.asList(lib_element, lib_element2, lib_element3); }
From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java
License:Open Source License
public static BasicDBList getBsonFromTextFiles(CustomMapReduceJobPojo cmr, int nLimit, String fields) throws IOException, SAXException, ParserConfigurationException { BasicDBList dbl = new BasicDBList(); PropertiesManager props = new PropertiesManager(); Configuration conf = getConfiguration(props); Path pathDir = HadoopUtils.getPathForJob(cmr, conf, false); FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.globStatus(new Path(pathDir.toString() + "/part-*")); for (FileStatus file : files) { if (file.getLen() > 0) { FSDataInputStream in = fs.open(file.getPath()); BufferedReader bin = new BufferedReader(new InputStreamReader(in)); for (;;) { String s = bin.readLine(); if (null == s) break; String[] keyValue = s.split("\t", 2); BasicDBObject dbo = new BasicDBObject(); if (keyValue.length > 1) { dbo.put("key", keyValue[0]); dbo.put("value", keyValue[1]); } else { dbo.put("value", keyValue[0]); }/*from w w w. j av a 2 s.co m*/ dbl.add(dbo); } in.close(); } } return dbl; }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
private void checkPathExists(Path path) { boolean exists; FileSystem fs;//from w w w .ja va 2 s. c o m Path qualifiedPath = path; try { fs = getFS(path); qualifiedPath = path.makeQualified(fs); exists = fs.exists(path); } catch (Exception e) { exists = false; } if (!exists) { throw new RuntimeException("The provided path doesn't exist " + qualifiedPath.toString() + "\nFor hdfs files use 'hdfs:' prefix like hdfs:/tmp/file.tsv" + "\nFor local files use 'file://' prefix like file:/tmp/file.tsv"); } }
From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java
License:Apache License
private void moveToProcessed(FileToIndex fileToIndex, Path processedParentDirPath) { // move the input file to the 'processed' or 'failed' dir Path processedIndexDirPath = null; try {//from w ww . java2s .c o m processedIndexDirPath = getIndexDirUnderPath(fileToIndex, processedParentDirPath); final Path processedFilePath = new Path(processedIndexDirPath, fileToIndex.fsPath.getName()); if (inputFS.exists(processedFilePath)) { log.info("Deleting older processed TSV: " + processedFilePath); inputFS.delete(processedFilePath, false); } if (!toIndexPath.toString().equals(processedParentDirPath.toString())) { inputFS.rename(fileToIndex.fsPath, processedFilePath); } } catch (IOException e) { log.warn("Failed to move processed file " + fileToIndex.fsPath + " to " + processedIndexDirPath, e); } }
From source file:com.indeed.imhotep.iql.cache.HDFSQueryCache.java
License:Apache License
@Override public OutputStream getOutputStream(String cachedFileName) throws IOException { if (!enabled) { throw new IllegalStateException("Can't send data to HDFS cache as it is disabled"); }/*from ww w. j ava 2 s.com*/ makeSurePathExists(cachePath); final Path filePath = new Path(cachePath, cachedFileName); final Path tempPath = new Path(filePath.toString() + "." + (System.currentTimeMillis() % 100000) + ".tmp"); final FSDataOutputStream fileOut = hdfs.create(tempPath); // Wrap the returned OutputStream so that we can finish when it is closed return new OutputStream() { private boolean closed = false; @Override public void write(byte[] b) throws IOException { fileOut.write(b); } @Override public void write(byte[] b, int off, int len) throws IOException { fileOut.write(b, off, len); } @Override public void flush() throws IOException { fileOut.flush(); } @Override public void write(int b) throws IOException { fileOut.write(b); } @Override public void close() throws IOException { if (closed) { return; } closed = true; fileOut.close(); // Move to the final file location hdfs.rename(tempPath, filePath); } }; }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
protected void findDiffBetweenLastAddedAndFirstPath(String stream, String table) { Set<Path> listOfPathsTobeRegistered = pathsToBeregisteredPerTable.get(table); synchronized (listOfPathsTobeRegistered) { if (listOfPathsTobeRegistered.isEmpty()) { return; } else {/*from ww w .ja va2 s . com*/ long lastAddedTime = lastAddedPartitionMap.get(table); Iterator<Path> it = listOfPathsTobeRegistered.iterator(); // get the first path Path firstPathInList = it.next(); Date timeFromPath = getTimeStampFromHCatPartition(firstPathInList.toString(), stream); LOG.info("Find the missing partitions between " + getLogDateString(lastAddedTime) + " and " + timeFromPath + " for table " + table); while (isMissingPartitions(timeFromPath.getTime(), lastAddedTime)) { long nextPathPartTime = lastAddedTime + MILLISECONDS_IN_MINUTE; Path nextPathTobeAdded = getFinalPath(nextPathPartTime, stream); LOG.info("Add the missing partition location " + nextPathTobeAdded + " to the list for registering"); if (nextPathTobeAdded != null) { listOfPathsTobeRegistered.add(nextPathTobeAdded); lastAddedTime = nextPathPartTime; } } pathsToBeregisteredPerTable.put(table, listOfPathsTobeRegistered); } } }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
protected void registerPartitions() throws InterruptedException, ParseException, HiveException { // return immediately if hcat is not enabled or if user issues a stop command if (!Conduit.isHCatEnabled() || stopped) { LOG.info("Hcat is not enabled or stop is issued. Hence not registering any partitions"); return;//from ww w .j a va 2s.c om } for (String stream : streamsToProcess) { if (!isStreamHCatEnabled(stream)) { LOG.info("Hcat is not enabled for " + stream + " stream." + " Hence not registering partitions"); continue; } String tableName = getTableName(stream); /* * If it is not able to find the diff between the last added partition * and first path in the partition list then it should not register * partitions until it finds the diff */ if (!preparePartitionsTobeRegistered(stream)) { LOG.info("Not registering the partitions as part of this run as" + " it was not able to find the last added partition" + " or diff betweeen last added and first path in the list"); continue; } if (lastAddedPartitionMap.get(tableName) == FAILED_GET_PARTITIONS) { LOG.warn("Failed to get partitions for stream from server hence" + " not registering new partiotions"); continue; } Set<Path> partitionsTobeRegistered = pathsToBeregisteredPerTable.get(tableName); LOG.info("partitions to be registered : " + partitionsTobeRegistered + " for " + tableName + " table"); synchronized (partitionsTobeRegistered) { if (partitionsTobeRegistered.isEmpty() || partitionsTobeRegistered.size() == 1) { LOG.info("No partitions to be registered for table " + tableName); return; } AddPartitionDesc addPd = new AddPartitionDesc(Conduit.getHcatDBName(), tableName, true); int numOfPartitionsTobeRegistered = partitionsTobeRegistered.size() - 1; Date updateWithLastAddedTime = null; int count = 0; Iterator<Path> pathIt = partitionsTobeRegistered.iterator(); // Register all the partitions in the list except the last one while (count++ < numOfPartitionsTobeRegistered) { /* always retrieve first element from the list as we remove the * element once it is added to partition. then second element will * be the first one */ Path pathToBeregistered = pathIt.next(); Date partitionDate = getTimeStampFromHCatPartition(pathToBeregistered.toString(), stream); addPd.addPartition(getPartSpecFromPartTime(partitionDate.getTime()), pathToBeregistered.toString()); updateWithLastAddedTime = partitionDate; } /* Add all partitions to the table and remove registered paths from * the in memory set if all partitions were added successfully */ if (addPartitions(stream, tableName, addPd, updateWithLastAddedTime)) { Iterator<Path> it = partitionsTobeRegistered.iterator(); while (numOfPartitionsTobeRegistered-- > 0) { LOG.debug("Remove partition path " + it.next() + "from the partitionMap"); it.remove(); } } } } }
From source file:com.inmobi.conduit.AbstractService.java
License:Apache License
private List<Path> listPartFiles(Path path, FileSystem fs) { List<Path> matches = new LinkedList<Path>(); try {//from w w w . j a v a2s . c om FileStatus[] statuses = fs.listStatus(path, new PathFilter() { public boolean accept(Path path) { return path.toString().contains("part"); } }); for (FileStatus status : statuses) { matches.add(status.getPath()); } } catch (IOException e) { LOG.error(e.getMessage(), e); } return matches; }