Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java

License:Open Source License

/**
 * {@inheritDoc}//from w w  w  .  ja  v  a2 s .c  o m
 * create path of the form dataroot/objectname
 * Each object name is modified to contain task-id prefix.
 * Thus for example, create
 * dataroot/objectname/_temporary/0/_temporary/attempt_201603131849_0000_m_000019_0/
 * part-r-00019-a08dcbab-8a34-4d80-a51c-368a71db90aa.csv
 * will be transformed to
 * PUT dataroot/object
 * /201603131849_0000_m_000019_0-part-r-00019-a08dcbab-8a34-4d80-a51c-368a71db90aa.csv
 *
 * @param f
 * @param permission
 * @param overwrite
 * @param bufferSize
 * @param replication
 * @param blockSize
 * @param progress
 * @return FSDataOutputStream to write data in
 * @throws IOException
 */
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    LOG.debug("Create method: {}", f.toString());
    String objNameModified = "";
    // check if request is dataroot/objectname/_SUCCESS
    if (f.getName().equals(Constants.HADOOP_SUCCESS)) {
        objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, false);
    } else {
        objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, true);
    }
    FSDataOutputStream outStream = storageClient.createObject(objNameModified, "binary/octet-stream", null,
            statistics);
    return outStream;
}

From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java

License:Open Source License

@Override
public boolean delete(Path f, boolean recursive) throws IOException {
    LOG.debug("delete method: {}. recursive {}", f.toString(), recursive);
    String objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, true);
    LOG.debug("Modified object name {}", objNameModified);
    if (objNameModified.contains(HADOOP_TEMPORARY)) {
        return true;
    }/*  ww  w. j av a  2  s.  com*/
    Path pathToObj = new Path(objNameModified);
    if (f.getName().startsWith(HADOOP_ATTEMPT)) {
        FileStatus[] fsList = storageClient.list(hostNameScheme, pathToObj.getParent(), true);
        if (fsList.length > 0) {
            for (FileStatus fs : fsList) {
                if (fs.getPath().getName().endsWith(f.getName())) {
                    storageClient.delete(hostNameScheme, fs.getPath(), recursive);
                }
            }
        }
    } else {
        FileStatus[] fsList = storageClient.list(hostNameScheme, pathToObj, true);
        if (fsList.length > 0) {
            for (FileStatus fs : fsList) {
                storageClient.delete(hostNameScheme, fs.getPath(), recursive);
            }
        }
    }
    return true;
}

From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java

License:Open Source License

/**
 * Extract object name from path. If addTaskIdCompositeName=true then
 * schema://tone1.lvm/aa/bb/cc/one3.txt/_temporary/0/_temporary/
 * attempt_201610052038_0001_m_000007_15/part-00007 will extract get
 * aa/bb/cc/201610052038_0001_m_000007_15-one3.txt
 * otherwise object name will be aa/bb/cc/one3.txt
 *
 * @param path path to extract from/*from  w w  w .j a v  a  2s.com*/
 * @param boundary boundary to search in a path
 * @param addTaskIdCompositeName if true will add task-id to the object name
 * @return new object name
 * @throws IOException if object name is missing
 */
private String getObjectName(Path fullPath, String boundary, boolean addTaskIdCompositeName)
        throws IOException {
    String path = fullPath.toString();
    String noPrefix = path.substring(hostNameScheme.length());
    int npIdx = noPrefix.indexOf(boundary);
    String objectName = "";
    if (npIdx >= 0) {
        if (npIdx == 0 || npIdx == 1 && noPrefix.startsWith("/")) {
            //no object name present
            //schema://tone1.lvm/_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007
            //schema://tone1.lvm_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007
            throw new IOException("Object name is missing");
        } else {
            //path matches pattern in javadoc
            objectName = noPrefix.substring(0, npIdx - 1);
            if (addTaskIdCompositeName) {
                String taskAttempt = Utils.extractTaskID(path);
                String objName = fullPath.getName();
                if (taskAttempt != null && !objName.startsWith(HADOOP_ATTEMPT)) {
                    objName = fullPath.getName() + "-" + taskAttempt;
                }
                objectName = objectName + "/" + objName;
            }
        }
        return objectName;
    }
    return noPrefix;
}

From source file:com.ibm.stocator.fs.swift.SwiftAPIClient.java

License:Open Source License

/**
 * Merge between two paths//from  w ww.j  a v a 2 s.  c o m
 *
 * @param hostName
 * @param p path
 * @param objectName
 * @return merged path
 */
private String getMergedPath(String hostName, Path p, String objectName) {
    if ((p.getParent() != null) && (p.getName() != null) && (p.getParent().toString().equals(hostName))) {
        if (objectName.equals(p.getName())) {
            return p.toString();
        }
        if (objectName.startsWith(p.getName())) {
            return p.getParent() + objectName;
        }
        return p.toString();
    }
    return hostName + objectName;
}

From source file:com.ibm.streamsx.hdfs.client.RegexExcludePathFilter.java

License:Open Source License

@Override
public boolean accept(Path path) {
    return path.getName().matches(regex);
}

From source file:com.iflytek.spider.crawl.GeneratorSmart.java

License:Apache License

/**
 * Generate fetchlists in one or more segments. Whether to filter URLs or not
 * is read from the crawl.generate.filter property in the configuration files.
 * If the property is not found, the URLs are filtered. Same for the
 * normalisation.//from  w  w  w . jav  a 2s  . co m
 * 
 * @param dbDir
 *          Crawl database directory
 * @param segments
 *          Segments directory
 * @param numLists
 *          Number of reduce tasks
 * @param curTime
 *          Current time in milliseconds
 * 
 * @return Path to generated segment or null if no entries were selected
 * 
 * @throws IOException
 *           When an I/O error occurs
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
public Path[] generate(Path dbDir, Path segments, int numLists, long curTime, boolean force)
        throws IOException, InterruptedException, ClassNotFoundException {
    //getConf().set("mapred.temp.dir", "d:/tmp");
    Path tempDir = new Path(
            getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

    Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
    FileSystem fs = FileSystem.get(getConf());
    LockUtil.createLockFile(fs, lock, force);

    LOG.info("Generator: Selecting best-scoring urls due for fetch.");
    LOG.info("Generator: starting");

    Job job = AvroJob.getAvroJob(getConf());
    if (numLists == -1) { // for politeness make
        numLists = job.getNumReduceTasks(); // a partition per fetch task
    }
    if ("local".equals(job.getConfiguration().get("mapred.job.tracker")) && numLists != 1) {
        // override
        LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
        numLists = 1;
    }
    LOG.info("Generator: with " + numLists + " partition.");
    job.getConfiguration().setLong(GENERATOR_CUR_TIME, curTime);
    // record real generation time
    long generateTime = System.currentTimeMillis();
    job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime);

    FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
    job.setInputFormatClass(AvroPairInputFormat.class);

    job.setMapperClass(SelectorMapper.class);
    job.setReducerClass(SelectorReducer.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    //job.setOutputFormatClass(AvroPairOutputFormat.class);
    job.setOutputFormatClass(GeneratorOutputFormat.class);
    job.setOutputKeyClass(Float.class);
    job.setOutputValueClass(SelectorEntry.class);
    // AvroMultipleOutputs.addNamedOutput(job, "seq",
    // AvroPairOutputFormat.class, Float.class, SelectorEntry.class);
    try {
        job.waitForCompletion(true);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }

    // read the subdirectories generated in the temp
    // output and turn them into segments
    List<Path> generatedSegments = new ArrayList<Path>();

    FileStatus[] status = fs.listStatus(tempDir);
    try {
        for (FileStatus stat : status) {
            Path subfetchlist = stat.getPath();
            if (!subfetchlist.getName().startsWith("fetchlist-"))
                continue;
            // start a new partition job for this segment
            Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);

            fs.createNewFile(new Path(newSeg, "generatored"));
            generatedSegments.add(newSeg);
        }
    } catch (Exception e) {
        LOG.warn("Generator: exception while partitioning segments, exiting ...");
        fs.delete(tempDir, true);
        return null;
    }

    if (generatedSegments.size() == 0) {
        LOG.warn("Generator: 0 records selected for fetching, exiting ...");
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        return null;
    }

    if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
        // update the db from tempDir
        Path tempDir2 = new Path(
                getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

        job = AvroJob.getAvroJob(getConf());
        job.setJobName("generate: updatedb " + dbDir);
        job.getConfiguration().setLong(Spider.GENERATE_TIME_KEY, generateTime);
        for (Path segmpaths : generatedSegments) {
            Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
            FileInputFormat.addInputPath(job, subGenDir);
        }
        FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
        job.setInputFormatClass(AvroPairInputFormat.class);
        job.setMapperClass(CrawlDbUpdateMapper.class);
        // job.setReducerClass(CrawlDbUpdater.class);
        job.setOutputFormatClass(AvroMapOutputFormat.class);
        job.setOutputKeyClass(String.class);
        job.setOutputValueClass(CrawlDatum.class);
        FileOutputFormat.setOutputPath(job, tempDir2);
        try {
            job.waitForCompletion(true);
            CrawlDb.install(job, dbDir);
        } catch (IOException e) {
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            fs.delete(tempDir2, true);
            throw e;
        }
        fs.delete(tempDir2, true);
    }

    LockUtil.removeLockFile(fs, lock);
    fs.delete(tempDir, true);

    if (LOG.isInfoEnabled()) {
        LOG.info("Generator: done.");
    }
    Path[] patharray = new Path[generatedSegments.size()];
    return generatedSegments.toArray(patharray);
}

From source file:com.iflytek.spider.parse.ParseSegment.java

License:Apache License

public void parse(Path segment) throws IOException, InterruptedException, ClassNotFoundException {

    if (LOG.isInfoEnabled()) {
        LOG.info("Parse: starting");
        LOG.info("Parse: segment: " + segment);
    }// w  w w  .  ja v  a2s.co m

    Job job = AvroJob.getAvroJob(getConf());
    job.setJobName("parse " + segment);

    FileInputFormat.addInputPath(job, new Path(segment, Content.DIR_NAME));
    job.getConfiguration().set(Spider.SEGMENT_NAME_KEY, segment.getName());

    job.setInputFormatClass(AvroPairInputFormat.class);
    job.setMapperClass(ParseMapper.class);

    FileOutputFormat.setOutputPath(job, segment);
    job.setOutputFormatClass(ParseOutputFormat.class);
    job.setOutputKeyClass(String.class);
    job.setOutputValueClass(UnionData.class);

    job.waitForCompletion(true);
    if (LOG.isInfoEnabled()) {
        LOG.info("Parse: done");
    }
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java

License:Open Source License

/** For input files (pure enrichment, not when used for analytics), deletes or archives the files following completion
 *//*  w w  w.java2  s  .c om*/
private void archiveOrDeleteFile() {
    try {
        final Path currentPath = _fileSplit.getPath(_currFile);
        // First check - if only want to do anything if this is an internal job:
        if (!currentPath.toString().contains(IStorageService.TO_IMPORT_DATA_SUFFIX)) {
            return; // (not your file to modify....)
        }

        final boolean storage_enabled = Optional.ofNullable(_dataBucket.data_schema())
                .map(ds -> ds.storage_schema()).map(ss -> Optional.ofNullable(ss.enabled()).orElse(true))
                .orElse(false);

        final boolean archive_enabled = storage_enabled
                && Optionals.of(() -> _dataBucket.data_schema().storage_schema().raw())
                        .map(raw -> Optional.ofNullable(raw.enabled()).orElse(true)).orElse(false);

        if (archive_enabled) {
            Path newPath = createArchivePath(currentPath);
            _fs.mkdirs(newPath);

            @SuppressWarnings("unused")
            final boolean success = _fs.rename(currentPath,
                    Path.mergePaths(newPath, new Path("/" + currentPath.getName())));
        } else {
            _fs.delete(currentPath, false);
        }
    } catch (Exception e) {
        logger.error(ErrorUtils.getLongForm(HadoopErrorUtils.EXCEPTION_CAUGHT, e));
        // We're just going to move on if we can't delete the file, it's
        // probably a permissions error
    }
}

From source file:com.ikanow.infinit.e.data_model.custom.InfiniteFileInputReader.java

License:Apache License

private static Path createNewName(Path subFile, String replacement)
        throws MalformedURLException, UnsupportedEncodingException, URISyntaxException {
    String path = subFile.toUri().toString(); // (currently the entire string)
    String name = subFile.getName();
    int startOfName = path.lastIndexOf(name);
    return new Path(replacement.replace("$name", name).replace("$path", path.substring(0, startOfName - 1)));
}

From source file:com.inclouds.hbase.utils.TableLocality.java

License:Open Source License

private static void reassignRegions(Map<String, List<Path>> serverMap) throws IOException {
    LOG.info("Reassigning regions");
    //admin.disableTable(table.getBytes());

    Collection<ServerName> servers = admin.getClusterStatus().getServers();
    for (String server : serverMap.keySet()) {
        ServerName sn = find(server, servers);
        List<Path> regions = serverMap.get(server);
        for (Path p : regions) {
            LOG.info("Moving " + p.getName() + " to " + sn);
            admin.move(p.getName().getBytes(), sn.toString().getBytes());
        }//from  w  w  w. j  a va 2 s  .  co m
    }

    //admin.enableTable(table.getBytes());

}