List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java
License:Open Source License
/** * {@inheritDoc}//from w w w . ja v a2 s .c o m * create path of the form dataroot/objectname * Each object name is modified to contain task-id prefix. * Thus for example, create * dataroot/objectname/_temporary/0/_temporary/attempt_201603131849_0000_m_000019_0/ * part-r-00019-a08dcbab-8a34-4d80-a51c-368a71db90aa.csv * will be transformed to * PUT dataroot/object * /201603131849_0000_m_000019_0-part-r-00019-a08dcbab-8a34-4d80-a51c-368a71db90aa.csv * * @param f * @param permission * @param overwrite * @param bufferSize * @param replication * @param blockSize * @param progress * @return FSDataOutputStream to write data in * @throws IOException */ public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { LOG.debug("Create method: {}", f.toString()); String objNameModified = ""; // check if request is dataroot/objectname/_SUCCESS if (f.getName().equals(Constants.HADOOP_SUCCESS)) { objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, false); } else { objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, true); } FSDataOutputStream outStream = storageClient.createObject(objNameModified, "binary/octet-stream", null, statistics); return outStream; }
From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java
License:Open Source License
@Override public boolean delete(Path f, boolean recursive) throws IOException { LOG.debug("delete method: {}. recursive {}", f.toString(), recursive); String objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, true); LOG.debug("Modified object name {}", objNameModified); if (objNameModified.contains(HADOOP_TEMPORARY)) { return true; }/* ww w. j av a 2 s. com*/ Path pathToObj = new Path(objNameModified); if (f.getName().startsWith(HADOOP_ATTEMPT)) { FileStatus[] fsList = storageClient.list(hostNameScheme, pathToObj.getParent(), true); if (fsList.length > 0) { for (FileStatus fs : fsList) { if (fs.getPath().getName().endsWith(f.getName())) { storageClient.delete(hostNameScheme, fs.getPath(), recursive); } } } } else { FileStatus[] fsList = storageClient.list(hostNameScheme, pathToObj, true); if (fsList.length > 0) { for (FileStatus fs : fsList) { storageClient.delete(hostNameScheme, fs.getPath(), recursive); } } } return true; }
From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java
License:Open Source License
/** * Extract object name from path. If addTaskIdCompositeName=true then * schema://tone1.lvm/aa/bb/cc/one3.txt/_temporary/0/_temporary/ * attempt_201610052038_0001_m_000007_15/part-00007 will extract get * aa/bb/cc/201610052038_0001_m_000007_15-one3.txt * otherwise object name will be aa/bb/cc/one3.txt * * @param path path to extract from/*from w w w .j a v a 2s.com*/ * @param boundary boundary to search in a path * @param addTaskIdCompositeName if true will add task-id to the object name * @return new object name * @throws IOException if object name is missing */ private String getObjectName(Path fullPath, String boundary, boolean addTaskIdCompositeName) throws IOException { String path = fullPath.toString(); String noPrefix = path.substring(hostNameScheme.length()); int npIdx = noPrefix.indexOf(boundary); String objectName = ""; if (npIdx >= 0) { if (npIdx == 0 || npIdx == 1 && noPrefix.startsWith("/")) { //no object name present //schema://tone1.lvm/_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007 //schema://tone1.lvm_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007 throw new IOException("Object name is missing"); } else { //path matches pattern in javadoc objectName = noPrefix.substring(0, npIdx - 1); if (addTaskIdCompositeName) { String taskAttempt = Utils.extractTaskID(path); String objName = fullPath.getName(); if (taskAttempt != null && !objName.startsWith(HADOOP_ATTEMPT)) { objName = fullPath.getName() + "-" + taskAttempt; } objectName = objectName + "/" + objName; } } return objectName; } return noPrefix; }
From source file:com.ibm.stocator.fs.swift.SwiftAPIClient.java
License:Open Source License
/** * Merge between two paths//from w ww.j a v a 2 s. c o m * * @param hostName * @param p path * @param objectName * @return merged path */ private String getMergedPath(String hostName, Path p, String objectName) { if ((p.getParent() != null) && (p.getName() != null) && (p.getParent().toString().equals(hostName))) { if (objectName.equals(p.getName())) { return p.toString(); } if (objectName.startsWith(p.getName())) { return p.getParent() + objectName; } return p.toString(); } return hostName + objectName; }
From source file:com.ibm.streamsx.hdfs.client.RegexExcludePathFilter.java
License:Open Source License
@Override public boolean accept(Path path) { return path.getName().matches(regex); }
From source file:com.iflytek.spider.crawl.GeneratorSmart.java
License:Apache License
/** * Generate fetchlists in one or more segments. Whether to filter URLs or not * is read from the crawl.generate.filter property in the configuration files. * If the property is not found, the URLs are filtered. Same for the * normalisation.//from w w w . jav a 2s . co m * * @param dbDir * Crawl database directory * @param segments * Segments directory * @param numLists * Number of reduce tasks * @param curTime * Current time in milliseconds * * @return Path to generated segment or null if no entries were selected * * @throws IOException * When an I/O error occurs * @throws ClassNotFoundException * @throws InterruptedException */ public Path[] generate(Path dbDir, Path segments, int numLists, long curTime, boolean force) throws IOException, InterruptedException, ClassNotFoundException { //getConf().set("mapred.temp.dir", "d:/tmp"); Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis()); Path lock = new Path(dbDir, CrawlDb.LOCK_NAME); FileSystem fs = FileSystem.get(getConf()); LockUtil.createLockFile(fs, lock, force); LOG.info("Generator: Selecting best-scoring urls due for fetch."); LOG.info("Generator: starting"); Job job = AvroJob.getAvroJob(getConf()); if (numLists == -1) { // for politeness make numLists = job.getNumReduceTasks(); // a partition per fetch task } if ("local".equals(job.getConfiguration().get("mapred.job.tracker")) && numLists != 1) { // override LOG.info("Generator: jobtracker is 'local', generating exactly one partition."); numLists = 1; } LOG.info("Generator: with " + numLists + " partition."); job.getConfiguration().setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime); FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME)); job.setInputFormatClass(AvroPairInputFormat.class); job.setMapperClass(SelectorMapper.class); job.setReducerClass(SelectorReducer.class); FileOutputFormat.setOutputPath(job, tempDir); //job.setOutputFormatClass(AvroPairOutputFormat.class); job.setOutputFormatClass(GeneratorOutputFormat.class); job.setOutputKeyClass(Float.class); job.setOutputValueClass(SelectorEntry.class); // AvroMultipleOutputs.addNamedOutput(job, "seq", // AvroPairOutputFormat.class, Float.class, SelectorEntry.class); try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); return null; } // read the subdirectories generated in the temp // output and turn them into segments List<Path> generatedSegments = new ArrayList<Path>(); FileStatus[] status = fs.listStatus(tempDir); try { for (FileStatus stat : status) { Path subfetchlist = stat.getPath(); if (!subfetchlist.getName().startsWith("fetchlist-")) continue; // start a new partition job for this segment Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists); fs.createNewFile(new Path(newSeg, "generatored")); generatedSegments.add(newSeg); } } catch (Exception e) { LOG.warn("Generator: exception while partitioning segments, exiting ..."); fs.delete(tempDir, true); return null; } if (generatedSegments.size() == 0) { LOG.warn("Generator: 0 records selected for fetching, exiting ..."); LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); return null; } if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) { // update the db from tempDir Path tempDir2 = new Path( getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis()); job = AvroJob.getAvroJob(getConf()); job.setJobName("generate: updatedb " + dbDir); job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime); for (Path segmpaths : generatedSegments) { Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME); FileInputFormat.addInputPath(job, subGenDir); } FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME)); job.setInputFormatClass(AvroPairInputFormat.class); job.setMapperClass(CrawlDbUpdateMapper.class); // job.setReducerClass(CrawlDbUpdater.class); job.setOutputFormatClass(AvroMapOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(CrawlDatum.class); FileOutputFormat.setOutputPath(job, tempDir2); try { job.waitForCompletion(true); CrawlDb.install(job, dbDir); } catch (IOException e) { LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); fs.delete(tempDir2, true); throw e; } fs.delete(tempDir2, true); } LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); if (LOG.isInfoEnabled()) { LOG.info("Generator: done."); } Path[] patharray = new Path[generatedSegments.size()]; return generatedSegments.toArray(patharray); }
From source file:com.iflytek.spider.parse.ParseSegment.java
License:Apache License
public void parse(Path segment) throws IOException, InterruptedException, ClassNotFoundException { if (LOG.isInfoEnabled()) { LOG.info("Parse: starting"); LOG.info("Parse: segment: " + segment); }// w w w . ja v a2s.co m Job job = AvroJob.getAvroJob(getConf()); job.setJobName("parse " + segment); FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME)); job.getConfiguration().set(Spider.SEGMENT_NAME_KEY, segment.getName()); job.setInputFormatClass(AvroPairInputFormat.class); job.setMapperClass(ParseMapper.class); FileOutputFormat.setOutputPath(job, segment); job.setOutputFormatClass(ParseOutputFormat.class); job.setOutputKeyClass(String.class); job.setOutputValueClass(UnionData.class); job.waitForCompletion(true); if (LOG.isInfoEnabled()) { LOG.info("Parse: done"); } }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java
License:Open Source License
/** For input files (pure enrichment, not when used for analytics), deletes or archives the files following completion *//* w w w.java2 s .c om*/ private void archiveOrDeleteFile() { try { final Path currentPath = _fileSplit.getPath(_currFile); // First check - if only want to do anything if this is an internal job: if (!currentPath.toString().contains(IStorageService.TO_IMPORT_DATA_SUFFIX)) { return; // (not your file to modify....) } final boolean storage_enabled = Optional.ofNullable(_dataBucket.data_schema()) .map(ds -> ds.storage_schema()).map(ss -> Optional.ofNullable(ss.enabled()).orElse(true)) .orElse(false); final boolean archive_enabled = storage_enabled && Optionals.of(() -> _dataBucket.data_schema().storage_schema().raw()) .map(raw -> Optional.ofNullable(raw.enabled()).orElse(true)).orElse(false); if (archive_enabled) { Path newPath = createArchivePath(currentPath); _fs.mkdirs(newPath); @SuppressWarnings("unused") final boolean success = _fs.rename(currentPath, Path.mergePaths(newPath, new Path("/" + currentPath.getName()))); } else { _fs.delete(currentPath, false); } } catch (Exception e) { logger.error(ErrorUtils.getLongForm(HadoopErrorUtils.EXCEPTION_CAUGHT, e)); // We're just going to move on if we can't delete the file, it's // probably a permissions error } }
From source file:com.ikanow.infinit.e.data_model.custom.InfiniteFileInputReader.java
License:Apache License
private static Path createNewName(Path subFile, String replacement) throws MalformedURLException, UnsupportedEncodingException, URISyntaxException { String path = subFile.toUri().toString(); // (currently the entire string) String name = subFile.getName(); int startOfName = path.lastIndexOf(name); return new Path(replacement.replace("$name", name).replace("$path", path.substring(0, startOfName - 1))); }
From source file:com.inclouds.hbase.utils.TableLocality.java
License:Open Source License
private static void reassignRegions(Map<String, List<Path>> serverMap) throws IOException { LOG.info("Reassigning regions"); //admin.disableTable(table.getBytes()); Collection<ServerName> servers = admin.getClusterStatus().getServers(); for (String server : serverMap.keySet()) { ServerName sn = find(server, servers); List<Path> regions = serverMap.get(server); for (Path p : regions) { LOG.info("Moving " + p.getName() + " to " + sn); admin.move(p.getName().getBytes(), sn.toString().getBytes()); }//from w w w. j a va 2 s . co m } //admin.enableTable(table.getBytes()); }